Feat C++ API
A feature engineering automation tool
individual.cc
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 
6 #include "individual.h"
7 
8 namespace FT{
9 namespace Pop{
10 
12 {
13  complexity = 0;
14  dim = 0;
15  parent_id.clear();
16  parent_id.push_back(-1);
17  set_id(-1);
18  this->p.clear();
19  fitness = -1;
20  fitness_v = -1;
21  fairness = -1;
22  fairness_v = -1;
23  dcounter=-1;
24  crowd_dist = -1;
25 }
26 
27 void Individual::initialize(const Parameters& params, bool random, int id)
28 {
29 
30  // pick a dimensionality for this individual
31  int dim = r.rnd_int(1,params.max_dim);
32  // pick depth from [params.min_depth, params.max_depth]
33  /* unsigned init_max = std::min(params.max_depth, unsigned int(3)); */
34  int depth;
35  if (random)
36  depth = r.rnd_int(1, params.max_depth);
37  else
38  /* depth = r.rnd_int(1, std::min(params.max_depth,unsigned(3))); */
39  depth = r.rnd_int(1, params.max_depth);
40  // make a program for each individual
41  int n_tries = 0;
42  while (n_tries < 10)
43  {
44  /* ostringstream msg; */
45  /* msg << "make program, try " << n_tries << ", id = " << id << endl; */
46  /* logger.log(msg.str(), 3); */
47  try {
48  char ot = r.random_choice(params.otypes);
49  this->program.make_program(params.functions,
50  params.terminals,
51  depth,
52  params.term_weights,
53  params.op_weights,
54  dim,
55  ot,
56  params.longitudinalMap,
57  params.ttypes);
58  break;
59  }
60  catch (...) {
61  WARN("Failed to build tree. trying again ...");
62  n_tries++;
63  if (n_tries == 10)
64  THROW_RUNTIME_ERROR("Could not resolve tree building. Try changing otype");
65  this->program.clear();
66  }
67  }
68  this->set_id(id);
69 }
70 
72 {
73  Individual cpy;
74  cpy.program = program;
75  cpy.p = p;
76  cpy.id = id;
77  /* if (sameid) */
78  /* cpy.id = id; */
79  return cpy;
80 }
82 void Individual::clone(Individual& cpy, bool sameid) const
83 {
84  cpy.program = program;
85  cpy.p = p;
86  if (sameid)
87  cpy.id = id;
88 }
90 void Individual::set_rank(unsigned r){rank=r;}
92 
93 int Individual::size() const { return program.size(); }
94 
97 {
98  int n_params =0;
99  for (unsigned int i =0; i< program.size(); ++i)
100  {
101  if (program.at(i)->isNodeDx())
102  {
103  n_params += program.at(i)->arity.at('f');
104  }
105  }
106  return n_params;
107 }
108 
109 unsigned int Individual::get_complexity() const {return this->complexity;};
110 
111 
112 void Individual::set_id(unsigned i) { id = i; }
113 
114 void Individual::set_parents(const vector<Individual>& parents)
115 {
116  parent_id.clear();
117  for (const auto& p : parents)
118  parent_id.push_back(p.id);
119 }
120 
122 vector<float> Individual::get_p() const { return p; }
123 
124 void Individual::set_p(const vector<float>& weights, const float& fb,
125  bool softmax_norm)
126 {
127  //cout<<"Weights size = "<<weights.size()<<"\n";
128  //cout<<"Roots size = "<<roots().size()<<"\n";
129  if(weights.size() != program.roots().size())
130  {
131  cout<<"Weights are\n";
132  for(float weight : weights)
133  cout<<weight<<"\n";
134 
135  cout<<"Roots are\n";
136  auto root1 = program.roots();
137  for(auto root : root1)
138  cout<<root<<"\n";
139 
140  cout<<"Program is \n";
141  for (const auto& p : program) std::cout << p->name << " ";
142  cout<<"\n";
143 
144  }
145  assert(weights.size() == program.roots().size());
146  p.resize(0);
147 
148  // normalize the sum of the weights
149  float sum = 0;
150  for (unsigned i =0; i<weights.size(); ++i)
151  sum += fabs(weights.at(i));
152  if (sum == 0)
153  sum = 1;
154 
155  p.resize(weights.size());
156  for (unsigned i=0; i< weights.size(); ++i)
157  p.at(i) = 1 - fabs(weights.at(i)/sum);
158  /* for (unsigned i=0; i<p.size(); ++i) */
159  /* p.at(i) = 1-p.at(i); */
160  float u = 1.0/float(p.size()); // uniform probability
161  /* std::cout << "p: "; */
162  /* for (auto tmp : p) cout << tmp << " " ; cout << "\n"; */
163  /* std::cout << "softmax(p)\n"; */
164  if (softmax_norm)
165  p = softmax(p);
166  // do partial uniform, partial weighted probability, using feedback
167  // ratio
168  for (unsigned i=0; i<p.size(); ++i)
169  p.at(i) = (1-fb)*u + fb*p.at(i);
170  /* cout << "exiting set_p\n"; */
171  // set weights
172  this->w = weights;
173 }
174 
175 float Individual::get_p(const size_t i, bool normalize) const
176 {
184  vector<size_t> rts = program.roots();
185  size_t j = 0;
186  float size = rts.at(0)+1;
187  /* cout << "roots: "; */
188  /* for (auto root : rts) cout << root << ", "; */
189  /* cout << "\n"; */
190  /* cout << "size: " << size << "\n"; */
191 
192  while ( j < rts.size())
193  {
194  if (j > 1)
195  size = rts.at(j) - rts.at(j-1);
196 
197  if (i <= rts.at(j))
198  {
199  float tmp = normalize ? p.at(j)/size : p.at(j) ;
200  /* cout << "returning " << tmp << endl; */
201  return normalize ? p.at(j)/size : p.at(j) ;
202  }
203  else
204  ++j;
205  }
206  if (i >= rts.size() || j == rts.size())
207  {
208  cout << "WARN: bad root index attempt in get_p()\n";
209  return 0.0;
210  }
211  // normalize weight by size of subtree
212  float tmp = normalize ? p.at(j)/size : p.at(j) ;
213  /* cout << "returning " << tmp << endl; */
214  return tmp;
215 }
216 
217 vector<float> Individual::get_p(const vector<size_t>& locs,
218  bool normalize) const
219 {
225  vector<float> ps;
226  for (const auto& el : locs)
227  {
228  /* cout << "getting p for " << el << "\n"; */
229  ps.push_back(get_p(el,normalize));
230  }
231  return ps;
232 }
233 
234 shared_ptr<CLabels> Individual::fit(const Data& d,
235  const Parameters& params, bool& pass)
236 {
237  // calculate program output matrix Phi
238  logger.log("Generating output for " + get_eqn(), 3);
239  Phi = out(d, false);
240  // calculate ML model from Phi
241  logger.log("ML training on " + get_eqn(), 3);
242  this->ml = std::make_shared<ML>(params.ml, params.normalize,
243  params.classification, params.n_classes);
244 
245  shared_ptr<CLabels> yh = this->ml->fit(Phi,d.y,params,pass,dtypes);
246 
247  if (pass)
248  {
249  logger.log("Setting individual's weights...", 3);
250  set_p(this->ml->get_weights(),params.feedback,
251  params.softmax_norm);
252  }
253  else
254  { // set weights to zero
255  vector<float> w(Phi.rows(), 0);
256  set_p(w,params.feedback,params.softmax_norm);
257  }
258 
259  this->yhat = ml->labels_to_vector(yh);
260 
261  return yh;
262 }
263 
264 shared_ptr<CLabels> Individual::fit(const Data& d,
265  const Parameters& params)
266 {
267  bool pass = true;
268  return this->fit(d, params, pass);
269 }
270 
271 shared_ptr<CLabels> Individual::predict(const Data& d)
272 {
273  // calculate program output matrix Phi
274  logger.log("Generating output for " + get_eqn(), 3);
275  // toggle validation
276  MatrixXf Phi_pred = out(d, true);
277  // TODO: guarantee this is not changing nodes
278 
279  if (Phi_pred.size()==0)
280  {
281  if (d.X.cols() == 0)
282  THROW_LENGTH_ERROR("The prediction dataset has no data");
283  else
284  THROW_LENGTH_ERROR("Phi_pred is empty");
285  }
286  // calculate ML model from Phi
287  logger.log("ML predicting on " + get_eqn(), 3);
288  // assumes ML is already trained
289  shared_ptr<CLabels> yhat = ml->predict(Phi_pred);
290  return yhat;
291 }
292 
293 ArrayXXf Individual::predict_proba(const Data& d)
294 {
295  // calculate program output matrix Phi
296  logger.log("Generating output for " + get_eqn(), 3);
297  // toggle validation
298  MatrixXf Phi_pred = out(d, true);
299  // TODO: guarantee this is not changing nodes
300 
301  if (Phi_pred.size()==0)
302  THROW_RUNTIME_ERROR("Phi_pred must be generated before "
303  "predict() is called\n");
304  // calculate ML model from Phi
305  logger.log("ML predicting on " + get_eqn(), 3);
306  // assumes ML is already trained
307  ArrayXXf yhat = ml->predict_proba(Phi_pred);
308  return yhat;
309 }
310 
312 {
313  return ml->labels_to_vector(this->predict(d));
314 }
315 
317 {
318  // we want to preserve the order of the outputs in the program
319  // in the order of the outputs in Phi.
320  // get root output types
321  this->dtypes.clear();
322  for (auto r : program.roots())
323  {
324  this->dtypes.push_back(program.at(r)->otype);
325  }
326  // convert state_f to Phi
327  logger.log("converting State to Phi",3);
328  int cols;
329 
330  if (state.f.size()==0)
331  {
332  if (state.c.size() == 0)
333  {
334  if (state.b.size() == 0)
335  THROW_RUNTIME_ERROR("Error: no outputs in State");
336 
337  cols = state.b.top().size();
338  }
339  else{
340  cols = state.c.top().size();
341  }
342  }
343  else{
344  cols = state.f.top().size();
345  }
346 
347  // define Phi matrix
348  Matrix<float,Dynamic,Dynamic,RowMajor> Phi (
349  state.f.size() + state.c.size() + state.b.size(),
350  cols);
351  ArrayXf Row;
352  std::map<char,int> rows;
353  rows['f']=0;
354  rows['c']=0;
355  rows['b']=0;
356 
357  // add rows (features) to Phi with appropriate type casting
358  for (int i = 0; i < this->dtypes.size(); ++i)
359  {
360  char rt = this->dtypes.at(i);
361 
362  switch (rt)
363  {
364  case 'f':
365  // add state_f to Phi
366  Row = ArrayXf::Map(state.f.at(rows.at(rt)).data(),cols);
367  break;
368  case 'c':
369  // convert state_c to Phi
370  Row = ArrayXi::Map(
371  state.c.at(rows.at(rt)).data(),cols).cast<float>();
372  break;
373  case 'b':
374  // add state_b to Phi
375  Row = ArrayXb::Map(
376  state.b.at(rows.at(rt)).data(),cols).cast<float>();
377  break;
378  default:
379  THROW_RUNTIME_ERROR("Unknown root type");
380  }
381  // remove nans, set infs to max and min
382  clean(Row);
383  Phi.row(i) = Row;
384  ++rows.at(rt);
385  }
386  return Phi;
387 }
388 
389 #ifndef USE_CUDA
390 // calculate program output matrix
391 MatrixXf Individual::out(const Data& d, bool predict)
392 {
401  State state;
402 
403  logger.log("evaluating program " + get_eqn(),3);
404  logger.log("program length: " + std::to_string(program.size()),3);
405  // evaluate each node in program
406  for (const auto& n : program)
407  {
408  // learning nodes are set for fit or predict mode
409  if (n->isNodeTrain())
410  dynamic_cast<NodeTrain*>(n.get())->train = !predict;
411  if(state.check(n->arity))
412  n->evaluate(d, state);
413  else
414  THROW_RUNTIME_ERROR("out() error: node " + n->name + " in "
415  + program_str() + " failed arity check\n");
416 
417  }
418 
419  return state_to_phi(state);
420 }
421 #else
422 MatrixXf Individual::out(const Data& d, bool predict)
423 {
424 
433  State state;
434  logger.log("evaluating program " + get_eqn(),3);
435  logger.log("program length: " + std::to_string(program.size()),3);
436  // to minimize copying overhead, set the state size to the maximum
437  // it will reach for the program
438  std::map<char, size_t> state_size = get_max_state_size();
439  // set the device based on the thread number
440  Op::choose_gpu();
441 
442  // allocate memory for the state on the device
443  /* std::cout << "X size: " << X.rows() << "x" << X.cols() << "\n"; */
444  state.allocate(state_size,d.X.cols());
445  /* state.f.resize( */
446  // evaluate each node in program
447  for (const auto& n : program)
448  {
449  if (n->isNodeTrain()) // learning nodes are set for fit or predict mode
450  dynamic_cast<NodeTrain*>(n.get())->train = !predict;
451  if(state.check(n->arity))
452  {
453  n->evaluate(d, state);
454  // adjust indices
455  state.update_idx(n->otype, n->arity);
456  }
457  else
458  {
459  std::cout << "individual::out() error: node " << n->name << " in " + program_str() +
460  " is invalid\n";
461  std::cout << "float state size: " << state.f.size() << "\n";
462  std::cout << "bool state size: " << state.b.size() << "\n";
463  std::cout << "op arity: " << n->arity.at('f') << "f, " << n->arity.at('b') << "b\n";
464  exit(1);
465  }
466  }
467  // copy data from GPU to state (calls trim also)
468  state.copy_to_host();
469  // remove extraneous rows from states
470  //state.trim();
471  //check state
472  /* std::cout << "state.f:" << state.f.rows() << "x" << state.f.cols() << "\n"; */
473  /* for (unsigned i = 0; i < state.f.rows() ; ++i){ */
474  /* for (unsigned j = 0; j<10 ; ++j) */
475  /* std::cout << state.f(i,j) << ","; */
476  /* std::cout << "\n\n"; */
477  /* } */
478  /* std::cout << "state.b:" << state.b.rows() << "x" << state.b.cols() << "\n"; */
479  /* for (unsigned i = 0; i < state.b.rows() ; ++i){ */
480  /* for (unsigned j = 0; j<10 ; ++j) */
481  /* std::cout << state.b(i,j) << ","; */
482  /* std::cout << "\n\n"; */
483  /* } */
484  // convert state to Phi
485  logger.log("converting State to Phi",3);
486  int cols;
487 
488  if (state.f.size()==0)
489  {
490  if (state.c.size() == 0)
491  {
492  if (state.b.size() == 0)
493  THROW_RUNTIME_ERROR("Error: no outputs in state");
494 
495  cols = state.b.cols();
496  }
497  else
498  cols = state.c.cols();
499  }
500  else
501  cols = state.f.cols();
502 
503  int rows_f = state.f.rows();
504  int rows_c = state.c.rows();
505  int rows_b = state.b.rows();
506 
507  dtypes.clear();
508  Matrix<float,Dynamic,Dynamic,RowMajor> Phi (rows_f+rows_b+rows_c, cols);
509 
510  // combine states into Phi
511  Phi << state.f.cast<float>(),
512  state.c.cast<float>(),
513  state.b.cast<float>();
514 
515 
516  /* std::cout << "Phi:" << Phi.rows() << "x" << Phi.cols() << "\n"; */
517 
518  for (unsigned int i=0; i<rows_f; ++i)
519  {
520  /* Phi.row(i) = VectorXf::Map(state.f.at(i).data(),cols); */
521  dtypes.push_back('f');
522  }
523 
524  for (unsigned int i=0; i<rows_c; ++i)
525  {
526  /* Phi.row(i) = VectorXf::Map(state.f.at(i).data(),cols); */
527  dtypes.push_back('c');
528  }
529 
530  // convert state_b to Phi
531  for (unsigned int i=0; i<rows_b; ++i)
532  {
533  /* Phi.row(i+rows_f) = ArrayXb::Map(state.b.at(i).data(),cols).cast<float>(); */
534  dtypes.push_back('b');
535  }
536 
537  return Phi;
538 }
539 #endif
540 
541 #ifndef USE_CUDA
542 // calculate program output matrix
543 
544 MatrixXf Individual::out_trace(const Data& d, vector<Trace>& state_trace)
545 {
554  State state;
555  logger.log("evaluating program " + program_str(),3);
556 
557  vector<size_t> roots = program.roots();
558  size_t root = 0;
559  bool trace=false;
560  size_t trace_idx=-1;
561 
562  // if first root is a Dx node, start off storing its subprogram
563  if (program.at(roots.at(root))->isNodeDx())
564  {
565  trace=true;
566  ++trace_idx;
567  state_trace.push_back(Trace());
568  }
569 
570  // evaluate each node in program
571  for (unsigned i = 0; i<program.size(); ++i)
572  {
573  /* cout << "i = " << i << ", root = "
574  * << roots.at(root) << "\n"; */
575  if (i > roots.at(root))
576  {
577  trace=false;
578  if (root + 1 < roots.size())
579  {
580  ++root; // move to next root
581  // if new root is a Dx node, start storing its args
582  if (program.at(roots.at(root))->isNodeDx())
583  {
584  trace=true;
585  ++trace_idx;
586  state_trace.push_back(Trace());
587  }
588  }
589  }
590  if(state.check(program.at(i)->arity))
591  {
592  if (trace)
593  state_trace.at(trace_idx).copy_to_trace(state,
594  program.at(i)->arity);
595 
596  program.at(i)->evaluate(d, state);
597  program.at(i)->visits = 0;
598  }
599  else
600  THROW_RUNTIME_ERROR("out() error: node "
601  + program.at(i)->name + " in " + program_str()
602  + " is invalid\n");
603  }
604 
605  return state_to_phi(state);
606 
607 }
608 
609 #else
610 // calculate program output matrix
611 MatrixXf Individual::out_trace(const Data& d, vector<Trace>& state_trace)
612 {
621  State state;
622  /* logger.log("evaluating program " + get_eqn(),3); */
623 
624  std::map<char, size_t> state_size = get_max_state_size();
625  // set the device based on the thread number
626  choose_gpu();
627  // allocate memory for the state on the device
628  /* std::cout << "X size: " << X.rows() << "x"
629  * << X.cols() << "\n"; */
630  state.allocate(state_size,d.X.cols());
631 
632  vector<size_t> roots = program.roots();
633  size_t root = 0;
634  bool trace=false;
635  size_t trace_idx=0;
636 
637  if (program.at(roots.at(root))->isNodeDx())
638  {
639  trace=true;
640  state_trace.push_back(Trace());
641  }
642 
643  // evaluate each node in program
644  for (unsigned i = 0; i<program.size(); ++i)
645  {
646  if (i > roots.at(root)){
647  ++root;
648  if (program.at(roots.at(root))->isNodeDx())
649  {
650  trace=true;
651  state_trace.push_back(Trace());
652  ++trace_idx;
653  }
654  else
655  trace=false;
656  }
657  if(state.check(program.at(i)->arity))
658  {
659  if (trace)
660  state_trace.at(trace_idx).copy_to_trace(state,
661  program.at(i)->arity);
662 
663  program.at(i)->evaluate(d, state);
664  state.update_idx(program.at(i)->otype,
665  program.at(i)->arity);
666  //cout << "\nstack.idx[otype]: "
667  //<< state.idx[program.at(i)->otype];
668  program.at(i)->visits = 0;
669  //cout << "Evaluated node " << program.at(i)->name << endl;
670 
671  }
672  else
673  THROW_RUNTIME_ERROR("out_trace() error: node "
674  + program.at(i)->name + " in " + program_str()
675  + " is invalid\n");
676  }
677 
678  state.copy_to_host();
679 
680  // convert state_f to Phi
681  logger.log("converting State to Phi",3);
682  int cols;
683 
684  if (state.f.size()==0)
685  {
686  if (state.c.size() == 0)
687  {
688  if (state.b.size() == 0)
689  THROW_RUNTIME_ERROR("Error: no outputs in State");
690 
691  cols = state.b.cols();
692  }
693  else
694  cols = state.c.cols();
695  }
696  else
697  cols = state.f.cols();
698 
699  int rows_f = state.f.rows();
700  int rows_c = state.c.rows();
701  int rows_b = state.b.rows();
702 
703  dtypes.clear();
704 
705  Matrix<float,Dynamic,Dynamic,RowMajor> Phi (rows_f+rows_c+rows_b,
706  cols);
707 
708  ArrayXXf PhiF = ArrayXXf::Map(state.f.data(),state.f.rows(),
709  state.f.cols());
710  ArrayXXi PhiC = ArrayXXi::Map(state.c.data(),state.c.rows(),
711  state.c.cols());
712  ArrayXXb PhiB = ArrayXXb::Map(state.b.data(),state.b.rows(),
713  state.b.cols());
714 
715  // combine State into Phi
716  Phi << PhiF.cast<float>(),
717  PhiC.cast<float>(),
718  PhiB.cast<float>();
719 
720  /* std::cout << "Phi:" << Phi.rows() << "x"
721  * << Phi.cols() << "\n"; */
722 
723  for (unsigned int i=0; i<rows_f; ++i)
724  {
725  /* Phi.row(i) = VectorXf::Map(state.f.at(i).data(),cols); */
726  dtypes.push_back('f');
727  }
728 
729  for (unsigned int i=0; i<rows_c; ++i)
730  {
731  /* Phi.row(i) = VectorXf::Map(state.f.at(i).data(),cols); */
732  dtypes.push_back('c');
733  }
734 
735  // convert state_b to Phi
736  for (unsigned int i=0; i<rows_b; ++i)
737  {
738  /* Phi.row(i+rows_f) = ArrayXb::Map(state.b.at(i).data(),
739  * cols).cast<float>(); */
740  dtypes.push_back('b');
741  }
742 
743  return Phi;
744 }
745 #endif
746 
747 // return symbolic representation of program
749 {
750  string eqn="";
751  State state;
752 
753  int i = 0;
754  for (const auto& n : program)
755  {
756  if(state.check_s(n->arity))
757  {
758  n->eval_eqn(state);
759  }
760  else
761  {
762  cout << n->name << " failed arity check" << endl;
763  cout << "state fs:\n";
764  for (auto s : state.fs)
765  cout << s << endl;
766  cout << "state bs:\n";
767  for (auto s : state.bs)
768  cout << s << endl;
769  cout << "state cs:\n";
770  for (auto s : state.cs)
771  cout << s << endl;
772  THROW_RUNTIME_ERROR("get_eqn() error: node "
773  + n->name + " at location " + to_string(i)
774  + " in [ " + program_str()
775  + " ] is invalid\n");
776  }
777  ++i;
778  }
779  // tie state outputs together to return representation
780  // order by root types
781  vector<char> root_types;
782  for (auto r : program.roots())
783  {
784  root_types.push_back(program.at(r)->otype);
785  }
786  std::map<char,int> rows;
787  rows['f']=0;
788  rows['c']=0;
789  rows['b']=0;
790 
791  for (int i = 0; i < root_types.size(); ++i)
792  {
793  char rt = root_types.at(i);
794  switch (rt)
795  {
796  case 'f':
797  eqn += "[" + state.fs.at(rows[rt]) + "]";
798  break;
799  case 'c':
800  eqn += "[" + state.cs.at(rows[rt]) + "]";
801  break;
802  case 'b':
803  eqn += "[" + state.bs.at(rows[rt]) + "]";
804  break;
805  default:
806  THROW_RUNTIME_ERROR("Unknown root type");
807  }
808  ++rows.at(rt);
809  }
810 
811  this->eqn = eqn;
812  return eqn;
813 }
814 
815 
816 // return vectorized symbolic representation of program
817 vector<string> Individual::get_features()
818 {
819  vector<string> features;
820  State state;
821 
822  for (const auto& n : program){
823  if(state.check_s(n->arity))
824  n->eval_eqn(state);
825  else
826  THROW_RUNTIME_ERROR("get_eqn() error: node " + n->name
827  + " in " + program_str() + " is invalid\n");
828  }
829  // tie state outputs together to return representation
830  // order by root types
831  vector<char> root_types;
832  for (auto r : program.roots())
833  {
834  root_types.push_back(program.at(r)->otype);
835  }
836  std::map<char,int> rows;
837  rows['f']=0;
838  rows['c']=0;
839  rows['b']=0;
840 
841  for (int i = 0; i < root_types.size(); ++i)
842  {
843  char rt = root_types.at(i);
844  switch (rt)
845  {
846  case 'f':
847  features.push_back(state.fs.at(rows[rt]));
848  break;
849  case 'c':
850  features.push_back(state.cs.at(rows[rt]));
851  break;
852  case 'b':
853  features.push_back(state.bs.at(rows[rt]));
854  break;
855  default:
856  THROW_RUNTIME_ERROR("Unknown root type");
857  }
858  ++rows.at(rt);
859  }
860 
861  /* // tie state outputs together to return representation */
862  /* for (auto s : state.fs) */
863  /* features.push_back(s); */
864  /* for (auto s : state.bs) */
865  /* features.push_back(s); */
866  /* for (auto s : state.cs) */
867  /* features.push_back(s); */
868 
869  return features;
870 }
871 
872 // get program dimensionality
873 unsigned int Individual::get_dim()
874 {
883  // only calculate if dim hasn't been assigned
884  if (dim == 0)
885  {
886  unsigned int ca=0; // current arity
887 
888  for (unsigned int i = program.size(); i>0; --i)
889  {
890  ca += program.at(i-1)->total_arity();
891  if (ca == 0) ++dim;
892  else --ca;
893  }
894  }
895  return dim;
896 }
897 
899 {
900  /* Check whether this individual dominates b.
901  *
902  * Input:
903  *
904  * b: another individual
905  *
906  * Output:
907  *
908  * 1: this individual dominates b; -1: b dominates this;
909  * 0: neither dominates
910  */
911 
912  int flag1 = 0, // to check if this has a smaller objective
913  flag2 = 0; // to check if b has a smaller objective
914 
915  for (int i=0; i<obj.size(); ++i) {
916  if (obj.at(i) < b.obj.at(i))
917  flag1 = 1;
918  else if (obj.at(i) > b.obj.at(i))
919  flag2 = 1;
920  }
921 
922  if (flag1==1 && flag2==0)
923  // there is at least one smaller objective for this and none
924  // for b
925  return 1;
926  else if (flag1==0 && flag2==1)
927  // there is at least one smaller objective for b and none
928  // for this
929  return -1;
930  else
931  // no smaller objective or both have one smaller
932  return 0;
933 
934 }
935 
936 void Individual::set_obj(const vector<string>& objectives)
937 {
941  obj.clear();
942 
943  for (const auto& n : objectives)
944  {
945  if (n.compare("fitness")==0)
946  obj.push_back(fitness);
947  else if (n.compare("complexity")==0)
948  obj.push_back(set_complexity());
949  else if (n.compare("size")==0)
950  obj.push_back(program.size());
951  // condition number of Phi
952  else if (n.compare("CN")==0)
953  {
954  obj.push_back(condition_number(Phi.transpose()));
955  }
956  // covariance structure of Phi
957  else if (n.compare("corr")==0)
958  obj.push_back(mean_square_corrcoef(Phi));
959  else if (n.compare("fairness")==0)
960  obj.push_back(fairness);
961  else
962  THROW_INVALID_ARGUMENT(n+" is not a known objective");
963  }
964 
965 }
966 
968 {
969  complexity = 0;
970  std::map<char, vector<unsigned int>> state_c;
971 
972  for (const auto& n : program)
973  n->eval_complexity(state_c);
974 
975  for (const auto& s : state_c)
976  for (const auto& t : s.second)
977  complexity += t;
978 
979  return complexity;
980 }
981 
983 {
984  /* @return a string of node names. */
985  string s = "[";
986  for (const auto& p : program)
987  {
988  s+= p->name;
989  s+=" ";
990  }
991  s+="]";
992  return s;
993 }
994 
995 std::map<char, size_t> Individual::get_max_state_size()
996 {
997  // max stack size is calculated using node arities
998  std::map<char, size_t> stack_size;
999  std::map<char, size_t> max_stack_size;
1000  stack_size['f'] = 0;
1001  stack_size['c'] = 0;
1002  stack_size['b'] = 0;
1003  max_stack_size['f'] = 0;
1004  max_stack_size['c'] = 0;
1005  max_stack_size['b'] = 0;
1006 
1007  for (const auto& n : program)
1008  {
1009  ++stack_size.at(n->otype);
1010 
1011  if ( max_stack_size.at(n->otype) < stack_size.at(n->otype))
1012  max_stack_size.at(n->otype) = stack_size.at(n->otype);
1013 
1014  for (const auto& a : n->arity)
1015  stack_size.at(a.first) -= a.second;
1016  }
1017  return max_stack_size;
1018 }
1019 shared_ptr<CLabels> Individual::fit_tune(const Data& d,
1020  const Parameters& params, bool set_default)
1021 {
1022  // calculate program output matrix Phi
1023  logger.log("Generating output for " + get_eqn(), 3);
1024  Phi = out(d, false);
1025  // calculate ML model from Phi
1026  logger.log("ML training on " + get_eqn(), 3);
1027  this->ml = std::make_shared<ML>(params.ml, params.normalize,
1028  params.classification, params.n_classes);
1029  bool pass = true;
1030  shared_ptr<CLabels> yh = this->ml->fit_tune(Phi, d.y,
1031  params, pass, dtypes, set_default);
1032 
1033  if (pass)
1034  {
1035  logger.log("Setting individual's weights...", 3);
1036  set_p(this->ml->get_weights(),params.feedback,
1037  params.softmax_norm);
1038  }
1039  else
1040  { // set weights to zero
1041  vector<float> w(Phi.rows(), 0);
1042  set_p(w,params.feedback,params.softmax_norm);
1043  }
1044 
1045  this->yhat = ml->labels_to_vector(yh);
1046 
1047  return yh;
1048 }
1049 
1050 void Individual::save(string filename)
1051 {
1052  std::ofstream out;
1053  if (!filename.empty())
1054  out.open(filename);
1055  json j;
1056  to_json(j, *this);
1057  out << j ;
1058  out.close();
1059 }
1061 void Individual::load(string filename)
1062 {
1063  std::ifstream indata;
1064  indata.open(filename);
1065  if (!indata.good())
1066  THROW_INVALID_ARGUMENT("Invalid input file " + filename + "\n");
1067 
1068  std::string line;
1069  indata >> line;
1070 
1071  json j = json::parse(line);
1072  from_json(j, *this);
1073  indata.close();
1074 }
1075 
1076 } // Pop
1077 } // FT
data holding X, y, and Z data
Definition: data.h:42
VectorXf & y
Definition: data.h:46
MatrixXf & X
Definition: data.h:45
type & top()
returns element at particular location in stack
Definition: state.h:69
unsigned int size()
returns top element of stack
Definition: state.h:66
type & at(int i)
Definition: state.h:72
individual programs in the population
Definition: individual.h:31
void set_parents(const vector< Individual > &parents)
set parent ids using parents
Definition: individual.cc:114
Individual clone()
Definition: individual.cc:71
vector< string > get_features()
return vectorized representation of program
Definition: individual.cc:817
vector< float > w
weights from ML training on program output
Definition: individual.h:42
float fairness
aggregate fairness score
Definition: individual.h:40
int size() const
return size of program
Definition: individual.cc:93
MatrixXf out(const Data &d, bool predict=false)
calculate program output matrix Phi
Definition: individual.cc:391
unsigned int dim
dimensionality of individual
Definition: individual.h:44
vector< float > p
probability of variation of subprograms
Definition: individual.h:43
VectorXf yhat
current output
Definition: individual.h:35
string get_eqn()
return symbolic representation of program
Definition: individual.cc:748
vector< char > dtypes
the data types of each column of the
Definition: individual.h:51
ArrayXXf predict_proba(const Data &d)
Definition: individual.cc:293
void initialize(const Parameters &params, bool random, int id=0)
copy assignment
Definition: individual.cc:27
unsigned id
tracking id
Definition: individual.h:53
int check_dominance(const Individual &b) const
check whether this dominates b.
Definition: individual.cc:898
MatrixXf Phi
transformation output of program
Definition: individual.h:34
int get_n_params()
get number of params in program
Definition: individual.cc:96
float fitness
aggregate fitness score
Definition: individual.h:38
MatrixXf state_to_phi(State &state)
converts program states to output matrices
Definition: individual.cc:316
NodeVector program
executable data structure
Definition: individual.h:33
vector< float > obj
objectives for use with Pareto selection
Definition: individual.h:45
shared_ptr< ML > ml
ML model, trained on Phi.
Definition: individual.h:37
void set_obj(const vector< string > &)
set obj vector given a string of objective names
Definition: individual.cc:936
void save(string filename)
save individual as a json object.
Definition: individual.cc:1050
string program_str() const
return program name list
Definition: individual.cc:982
shared_ptr< CLabels > predict(const Data &d)
Definition: individual.cc:271
Array< bool, Dynamic, Dynamic, RowMajor > ArrayXXb
Definition: individual.h:173
vector< float > get_p() const
get probabilities of variation
Definition: individual.cc:122
unsigned int dcounter
number of individuals this dominates
Definition: individual.h:46
shared_ptr< CLabels > fit(const Data &d, const Parameters &params, bool &pass)
fits an ML model to the data after transformation
Definition: individual.cc:234
vector< int > parent_id
ids of parents
Definition: individual.h:54
float crowd_dist
crowding distance on the Pareto front
Definition: individual.h:49
unsigned int rank
pareto front rank
Definition: individual.h:48
float fairness_v
aggregate validation fairness score
Definition: individual.h:41
MatrixXf out_trace(const Data &d, vector< Trace > &stack_trace)
calculate program output while maintaining stack trace
Definition: individual.cc:544
string eqn
equation form of the program
Definition: individual.h:55
void set_id(unsigned i)
Definition: individual.cc:112
unsigned int get_complexity() const
get the program complexity without updating it.
Definition: individual.cc:109
std::map< char, size_t > get_max_state_size()
get maximum stack size needed for evaluation.
Definition: individual.cc:995
void set_p(const vector< float > &weights, const float &fb, const bool softmax_norm=false)
set probabilities
Definition: individual.cc:124
unsigned int set_complexity()
calculate program complexity and return it.
Definition: individual.cc:967
VectorXf predict_vector(const Data &d)
Definition: individual.cc:311
float fitness_v
aggregate validation fitness score
Definition: individual.h:39
void load(string filename)
load individual from a file.
Definition: individual.cc:1061
unsigned int get_dim()
grab sub-tree locations given starting point.
Definition: individual.cc:873
shared_ptr< CLabels > fit_tune(const Data &d, const Parameters &params, bool set_default=false)
fits and tunes an ML model to the data after transformation
Definition: individual.cc:1019
void set_rank(unsigned r)
setting and getting from individuals vector
Definition: individual.cc:90
unsigned int complexity
the complexity of the program.
Definition: individual.h:50
string log(string m, int v, string sep="\n") const
print message with verbosity control.
Definition: logger.cc:54
int rnd_int(int lowerLimit, int upperLimit)
Definition: rnd.cc:77
T random_choice(const vector< T > &v)
Definition: rnd.h:73
#define THROW_LENGTH_ERROR(err)
Definition: error.h:32
#define THROW_RUNTIME_ERROR(err)
Definition: error.h:30
#define WARN(err)
Definition: error.h:33
#define THROW_INVALID_ARGUMENT(err)
Definition: error.h:31
void choose_gpu()
void from_json(const json &j, NodeVector &nv)
Definition: nodevector.cc:442
void to_json(json &j, const NodeVector &nv)
Definition: nodevector.cc:392
float condition_number(const MatrixXf &X)
returns the condition number of a matrix.
Definition: utils.cc:236
vector< T > softmax(const vector< T > &w)
return the softmax transformation of a vector.
Definition: utils.h:130
float mean_square_corrcoef(const MatrixXf &X)
Definition: utils.cc:266
static Logger & logger
Definition: logger.h:46
static Rnd & r
Definition: rnd.h:135
std::string to_string(const T &value)
template function to convert objects to string for logging
Definition: utils.h:422
void clean(ArrayXf &x)
limits node output to be between MIN_FLT and MAX_FLT
Definition: utils.cc:18
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
contains various types of State actually used by feat
Definition: state.h:102
Stack< ArrayXb > b
boolean node stack
Definition: state.h:104
Stack< string > fs
floating node string stack
Definition: state.h:107
bool check_s(std::map< char, unsigned int > &arity)
Definition: state.cc:34
Stack< string > bs
boolean node string stack
Definition: state.h:108
bool check(std::map< char, unsigned int > &arity)
checks if arity of node provided satisfies the node names in various string State
Definition: state.cc:19
Stack< string > cs
categorical node string stack
Definition: state.h:109
Stack< ArrayXi > c
categorical stack
Definition: state.h:105
Stack< ArrayXf > f
floating node stack
Definition: state.h:103
used for tracing stack outputs for backprop algorithm.
Definition: state.h:232
holds the hyperparameters for Feat.
Definition: params.h:25
bool softmax_norm
use softmax norm on probabilities
Definition: params.h:74
unsigned int max_dim
maximum dimensionality of programs
Definition: params.h:49
bool classification
flag to conduct classification rather than
Definition: params.h:32
float feedback
strength of ml feedback on probabilities
Definition: params.h:56
unsigned int n_classes
number of classes for classification
Definition: params.h:57
NodeVector functions
function nodes available in programs
Definition: params.h:42
unsigned int max_depth
max depth of programs
Definition: params.h:47
string ml
machine learner used with Feat
Definition: params.h:31
vector< float > term_weights
probability weighting of terminals
Definition: params.h:40
bool normalize
whether to normalize the input data
Definition: params.h:75
vector< std::string > longitudinalMap
Definition: params.h:45
NodeVector terminals
terminal nodes available in programs vector storing longitudinal data keys
Definition: params.h:43
vector< char > ttypes
program terminal types ('f', 'b')
Definition: params.h:35
vector< char > otypes
program output types ('f', 'b')
Definition: params.h:34
vector< float > op_weights
probability weighting of functions
Definition: params.h:41
vector< size_t > roots() const
returns indices of root nodes
Definition: nodevector.cc:55
void make_program(const NodeVector &functions, const NodeVector &terminals, int max_d, const vector< float > &term_weights, const vector< float > &op_weights, int dim, char otype, vector< string > longitudinalMap, const vector< char > &term_types)
Definition: nodevector.cc:368