Feat C++ API
A feature engineering automation tool
auto_backprop.cc
Go to the documentation of this file.
1 
2 #include "auto_backprop.h"
3 
12 namespace FT {
13 
14  namespace Opt{
15 
16  AutoBackProp::AutoBackProp(string scorer, int iters, float n, float a)
17  {
18  /* this->program = program.get_data(); */
20  score_hash["log"] = &Eval::log_loss;
21  score_hash["multi_log"] = &Eval::multi_log_loss;
22  score_hash["fpr"] = &Eval::log_loss;
23  score_hash["zero_one"] = &Eval::log_loss;
24 
26  d_score_hash["log"] = &Eval::d_log_loss;
27  d_score_hash["multi_log"] = &Eval::d_multi_log_loss;
29  d_score_hash["zero_one"] = &Eval::d_log_loss;
30 
31  this->d_cost_func = d_score_hash.at(scorer);
32  this->cost_func = score_hash.at(scorer);
33 
34  /* this->X = X; */
35  /* this->labels = labels; */
36  this->iters = iters;
37  this->n = n;
38  this->epT = 0.01*this->n; // min learning rate
39  this->a = a;
40  }
41 
43  for (const auto& p : program)
44  {
45  cout << "( " << p->name;
46  if (p->isNodeDx()) {
47 
48  NodeDx* dNode = dynamic_cast<NodeDx*>(p.get());
49  for (int i = 0; i < dNode->arity.at('f'); i++) {
50  cout << "," << dNode->W.at(i);
51  }
52  dNode = nullptr;
53  }
54 
55  cout << " ) ";
56  }
57  /* cout << "\n"; */
58  }
59 
60  void AutoBackProp::run(Individual& ind, const Data& d,
61  const Parameters& params)
62  {
63  vector<size_t> roots = ind.program.roots();
64  float min_loss;
65  float current_loss, current_val_loss;
66  vector<vector<float>> best_weights;
67  // split up the data so we have a validation set
68  DataRef BP_data(d.X, d.y, d.Z, d.classification);
69  BP_data.train_test_split(true, 0.5);
70  // set up batch data
71  MatrixXf Xb, Xb_v;
72  VectorXf yb, yb_v;
73  /* std::map<string, std::pair<vector<ArrayXf>, vector<ArrayXf> > > Zb, Zb_v; */
74  LongData Zb, Zb_v;
75  /* cout << "y: " << d.y.transpose() << "\n"; */
76  Data batch_data(Xb, yb, Zb, params.classification);
77  /* Data db_val(Xb_v, yb_v, Zb_v, params.classification); */
78  /* db_val.set_validation(); // make this a validation set */
79  // if batch size is 0, set batch to 20% of training data
80  int batch_size = params.bp.batch_size > 0?
81  params.bp.batch_size : .2*BP_data.t->y.size();
82  /* d.get_batch(db_val, ); // draw a batch for the validation data */
83  // number of iterations to allow validation fitness to not improve
84  int patience = 3;
85  int missteps = 0;
86 
87  this->epk = n; // starting learning rate
88  /* logger.log("running backprop on " + ind.get_eqn(), 2); */
89  /* cout << ind.get_eqn() << endl; */
90  logger.log("=========================",4);
91  logger.log("Iteration,Train Loss,Val Loss,Weights",4);
92  logger.log("=========================",4);
93  for (int x = 0; x < this->iters; x++)
94  {
95  logger.log("get batch",3);
96  // get batch data for training
97  BP_data.t->get_batch(batch_data, batch_size);
98  /* cout << "batch_data.y: " */
99  /* << batch_data.y.transpose() << "\n"; */
100  // Evaluate forward pass
101  MatrixXf Phi;
102  logger.log("forward pass",3);
103  vector<Trace> stack_trace = forward_prop(ind, batch_data,
104  Phi, params);
105  // Evaluate ML model on Phi
106  bool pass = true;
107  auto ml = std::make_shared<ML>(params.ml, true,
108  params.classification, params.n_classes);
109 
110  logger.log("ml fit",3);
111  shared_ptr<CLabels> yhat = ml->fit(Phi,
112  batch_data.y,params,pass,ind.dtypes);
113 
114 
115  if (!pass || stack_trace.size() ==0 )
116  break;
117 
118  vector<float> Beta = ml->get_weights();
119  /* cout << "cost func\n"; */
120  current_loss = this->cost_func(batch_data.y, yhat,
121  params.class_weights).mean();
122 
123  // Evaluate backward pass
124  size_t s = 0;
125  for (int i = 0; i < stack_trace.size(); ++i)
126  {
127  while (!ind.program.at(roots.at(s))->isNodeDx()) ++s;
128  /* cout << "roots.at(s): " << roots.at(s) << endl; */
129  /* cout << "running backprop on " */
130  /* << ind.get_eqn() << "\n"; */
131  /* << " from " */
132  /* << roots.at(s) << " to " */
133  /* << ind.program.subtree(roots.at(s)) << "\n"; */
134 
135  backprop(stack_trace.at(i), ind.program,
136  ind.program.subtree(roots.at(s)),
137  roots.at(s), Beta.at(s), // /ml->N.scale.at(s),
138  yhat, batch_data, params.class_weights);
139  }
140 
141  // check validation fitness for early stopping
142  MatrixXf Phival = ind.out((*BP_data.v));
143  logger.log("checking validation fitness",3);
144  /* cout << "Phival: " << Phival.rows()
145  * << " x " << Phival.cols() << "\n"; */
146  /* cout << "y_val\n"; */
147  shared_ptr<CLabels> y_val = ml->predict(Phival);
148  current_val_loss = this->cost_func(BP_data.v->y, y_val,
149  params.class_weights).mean();
150  if (x==0 || current_val_loss < min_loss)
151  {
152  min_loss = current_val_loss;
153  best_weights = ind.program.get_weights();
154  }
155  else
156  {
157  ++missteps;
158  /* cout << "missteps: " << missteps << "\n"; */
159  logger.log("",3); // update learning rate
160  }
161  // early stopping trigger
162  if (missteps == patience
163  || std::isnan(min_loss)
164  || std::isinf(min_loss)
165  || min_loss <= NEAR_ZERO)
166  break;
167  else
168  logger.log("min loss: " + std::to_string(min_loss), 3);
169 
170  float alpha = float(x)/float(iters);
171 
172  this->epk = (1 - alpha)*this->epk + alpha*this->epT;
173  /* this->epk = this->epk + this->epT; */
174  /* cout << "epk: " << this->epk << "\n"; */
175  if (params.verbosity>3)
176  {
177  cout << x << ","
178  << current_loss << ","
179  << current_val_loss << ",";
180  print_weights(ind.program);
181  }
182  }
183  logger.log("",4);
184  logger.log("=========================",4);
185  logger.log("done=====================",4);
186  logger.log("=========================",4);
187  ind.program.set_weights(best_weights);
188  }
189 
190  // forward pass
191  vector<Trace> AutoBackProp::forward_prop(Individual& ind, const Data& d,
192  MatrixXf& Phi, const Parameters& params)
193  {
194  /* cout << "Forward pass\n"; */
195  // Iterate through all the nodes evaluating and tracking ouputs
196  vector<Trace> stack_trace;
197  Phi = ind.out_trace(d, stack_trace);
198  // Use stack_f and execution stack to avoid issue of branches affecting what elements
199  // appear before a node
200  /* cout << "Returning forward pass.\n"; */
201  return stack_trace;
202  }
203  // Updates stacks to have proper value on top
204  void AutoBackProp::next_branch(vector<BP_NODE>& executing, vector<Node*>& bp_program,
205  vector<ArrayXf>& derivatives)
206  {
207  // While there are still nodes with branches to explore
208  if(!executing.empty()) {
209  // Declare variable to hold node and its associated derivatives
210  BP_NODE bp_node = pop<BP_NODE>(&executing); // Check first element
211  // Loop until branch to explore is found
212  while (bp_node.deriv_list.empty() && !executing.empty()) {
213  bp_node = pop<BP_NODE>(&executing); // Get node and its derivatves
214 
215  // For some reason this function is not removing element from the stack
216  pop<ArrayXf>(&derivatives); // Remove associated gradients from stack
217  if (executing.empty()) {
218  return;
219  }
220  }
221 
222  // Should now have the next parent node and derivatves (stored in bp_node)
223  if (!bp_node.deriv_list.empty())
224  {
225  bp_program.push_back(bp_node.n);
226  // Pull derivative from front of list due to how we stored them earlier
227  derivatives.push_back(pop_front<ArrayXf>(&(bp_node.deriv_list)));
228  // Push it back on the stack in order to sync all the stacks
229  executing.push_back(bp_node);
230  }
231  }
232  }
233 
234  // Compute gradients and update weights
235  void AutoBackProp::backprop(Trace& stack, NodeVector& program, int start, int end,
236  float Beta, shared_ptr<CLabels>& yhat,
237  const Data& d,
238  vector<float> sw)
239  {
240  /* cout << "Backward pass \n"; */
241  vector<ArrayXf> derivatives;
242  // start with derivative of cost function wrt ML output times dyhat/dprogram output, which
243  // is equal to the weight the model assigned to this subprogram (Beta)
244  // push back derivative of cost function wrt ML output
245  /* cout << "Beta: " << Beta << "\n"; */
246  derivatives.push_back(this->d_cost_func(d.y, yhat, sw).array() * Beta); //*phi.array());
247  /* cout << "Cost derivative: " << derivatives[derivatives.size() -1 ]<< "\n"; */
248  // Working according to test program */
249  /* pop<ArrayXf>(&f_stack); // Get rid of input to cost function */
250  vector<BP_NODE> executing; // Stores node and its associated derivatves
251  // Currently I don't think updates will be saved, might want a pointer of nodes so don't
252  // have to restock the list
253  // Program we loop through and edit during algorithm (is this a shallow or deep copy?)
254  /* cout << "copy program \n"; */
255  vector<Node*> bp_program = program.get_data(start, end);
256  /* cout << "Initializing backprop systems.\n"; */
257  while (bp_program.size() > 0) {
258  /* cout << "Size of program: " << bp_program.size() << "\n"; */
259  Node* node = pop<Node*>(&bp_program);
260  /* cout << "Evaluating: " << node->name << "\n"; */
261  /* cout << "executing stack: " ; */
262  /* for (const auto& bpe : executing) cout << bpe.n->name << " " ; cout << "\n"; */
263  /* cout << "bp_program: " ; */
264  /* for (const auto& bpe : bp_program) cout << bpe->name << " " ; cout << "\n"; */
265  /* cout << "derivatives size: " << derivatives.size() << "\n"; */
266  vector<ArrayXf> n_derivatives;
267 
268  if (node->isNodeDx() && node->visits == 0 && node->arity.at('f') > 0) {
269  NodeDx* dNode = dynamic_cast<NodeDx*>(node); // Could probably put this up one and have the if condition check if null
270  /* cout << "evaluating derivative\n"; */
271  // Calculate all the derivatives and store them, then update all the weights and throw away the node
272  for (int i = 0; i < node->arity.at('f'); i++) {
273  dNode->derivative(n_derivatives, stack, i);
274  }
275  /* cout << "updating derivatives\n"; */
276  dNode->update(derivatives, stack, this->epk, this->a);
277  // dNode->print_weight();
278  /* cout << "popping input arguments\n"; */
279  // Get rid of the input arguments for the node
280  for (int i = 0; i < dNode->arity.at('f'); i++) {
281  pop<ArrayXf>(&stack.f);
282  }
283  for (int i = 0; i < dNode->arity.at('b'); i++) {
284  pop<ArrayXb>(&stack.b);
285  }
286  for (int i = 0; i < dNode->arity.at('c'); i++) {
287  pop<ArrayXi>(&stack.c);
288  }
289  if (!n_derivatives.empty()) {
290  derivatives.push_back(pop_front<ArrayXf>(&n_derivatives));
291  }
292 
293  executing.push_back({dNode, n_derivatives});
294  }
295  /* else */
296  /* cout << "not NodeDx or visits reached or no floating arity\n"; */
297  /* cout << "next branch\n"; */
298  // Choosing how to move through tree
299  if (node->arity.at('f') == 0 || !node->isNodeDx()) {
300 
301  // Clean up gradients and find the parent node
302  /* cout << "popping derivatives\n"; */
303  if (!derivatives.empty())
304  pop<ArrayXf>(&derivatives); // TODO check if this fixed
305  next_branch(executing, bp_program, derivatives);
306  }
307  else
308  {
309  node->visits += 1;
310  if (node->visits > node->arity.at('f'))
311  {
312  next_branch(executing, bp_program, derivatives);
313  }
314  }
315  }
316 
317  // point bp_program to null
318  for (unsigned i = 0; i < bp_program.size(); ++i)
319  bp_program.at(i) = nullptr;
320 
321  /* cout << "Backprop terminated\n"; */
322  //print_weights(program);
323  }
324  }
325 }
326 
Data * t
Definition: data.h:93
Data * v
Definition: data.h:92
void train_test_split(bool shuffle, float split)
splits data into training and validation folds.
Definition: data.cc:362
data holding X, y, and Z data
Definition: data.h:42
VectorXf & y
Definition: data.h:46
void get_batch(Data &db, int batch_size) const
select random subset of data for training weights.
Definition: data.cc:79
bool classification
Definition: data.h:48
LongData & Z
Definition: data.h:47
MatrixXf & X
Definition: data.h:45
void run(Individual &ind, const Data &d, const Parameters &params)
adapt weights
std::map< string, callback > score_hash
Definition: auto_backprop.h:72
vector< Trace > forward_prop(Individual &ind, const Data &d, MatrixXf &Phi, const Parameters &params)
Return the f_stack.
void backprop(Trace &f_stack, NodeVector &program, int start, int end, float Beta, shared_ptr< CLabels > &yhat, const Data &d, vector< float > sw)
Compute gradients and update weights.
std::map< string, callback > d_score_hash
Definition: auto_backprop.h:71
void next_branch(vector< BP_NODE > &executing, vector< Node * > &bp_program, vector< ArrayXf > &derivatives)
Updates stacks to have proper value on top.
void print_weights(NodeVector &program)
AutoBackProp(string scorer, int iters=1000, float n=0.1, float a=0.9)
individual programs in the population
Definition: individual.h:31
MatrixXf out(const Data &d, bool predict=false)
calculate program output matrix Phi
Definition: individual.cc:391
vector< char > dtypes
the data types of each column of the
Definition: individual.h:51
NodeVector program
executable data structure
Definition: individual.h:33
MatrixXf out_trace(const Data &d, vector< Trace > &stack_trace)
calculate program output while maintaining stack trace
Definition: individual.cc:544
void update(vector< ArrayXf > &gradients, Trace &state, float n, float a)
Definition: n_Dx.cc:15
void derivative(vector< ArrayXf > &gradients, Trace &state, int loc)
Definition: n_Dx.cc:10
std::vector< float > W
Definition: n_Dx.h:16
Represents nodes in a program.
Definition: node.h:54
std::map< char, unsigned int > arity
arity of the operator
Definition: node.h:59
virtual bool isNodeDx()
check of node type
Definition: node.h:86
string log(string m, int v, string sep="\n") const
print message with verbosity control.
Definition: logger.cc:54
std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > LongData
Definition: data.h:23
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &confidences, const vector< float > &class_weights)
multinomial log loss
Definition: metrics.cc:191
VectorXf d_squared_difference(const VectorXf &y, const VectorXf &yhat)
Definition: metrics.cc:44
VectorXf log_loss(const VectorXf &y, const VectorXf &yhat, const vector< float > &class_weights)
Definition: metrics.cc:88
VectorXf squared_difference(const VectorXf &y, const VectorXf &yhat)
Definition: metrics.cc:18
VectorXf d_log_loss(const VectorXf &y, const VectorXf &yhat, const vector< float > &class_weights)
Definition: metrics.cc:159
VectorXf d_multi_log_loss(const VectorXf &y, shared_ptr< CLabels > &labels, const vector< float > &class_weights)
derivative of multinomial log loss
Definition: metrics.cc:303
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
Definition: utils.cc:217
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
Definition: utils.cc:226
static Logger & logger
Definition: logger.h:46
std::string to_string(const T &value)
template function to convert objects to string for logging
Definition: utils.h:422
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
static float NEAR_ZERO
Definition: init.h:46
used for tracing stack outputs for backprop algorithm.
Definition: state.h:232
vector< ArrayXf > f
Definition: state.h:233
vector< ArrayXb > b
Definition: state.h:235
vector< ArrayXi > c
Definition: state.h:234
vector< ArrayXf > deriv_list
Definition: auto_backprop.h:44
holds the hyperparameters for Feat.
Definition: params.h:25
bool classification
flag to conduct classification rather than
Definition: params.h:32
vector< float > class_weights
weights for each class
Definition: params.h:60
unsigned int n_classes
number of classes for classification
Definition: params.h:57
string ml
machine learner used with Feat
Definition: params.h:31
int verbosity
Definition: params.h:39
BP bp
backprop parameters
Definition: params.h:92
an extension of a vector of unique pointers to nodes
Definition: nodevector.h:23
vector< size_t > roots() const
returns indices of root nodes
Definition: nodevector.cc:55
vector< Node * > get_data(int start=0, int end=0)
returns vector of raw pointers to nodes in [start,end], or all if both are zero
Definition: nodevector.cc:38
void set_weights(vector< vector< float >> &weights)
Definition: nodevector.cc:150
vector< vector< float > > get_weights()
Definition: nodevector.cc:189
size_t subtree(size_t i, char otype='0', string indent="> ") const
Definition: nodevector.cc:80