Feat C++ API
A feature engineering automation tool
lexicase.cc
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 
6 #include "lexicase.h"
7 
8 namespace FT{
9 namespace Sel{
10 
11 Lexicase::Lexicase(bool surv){ name = "lexicase"; survival = surv; }
12 
14 
15 vector<size_t> Lexicase::select(Population& pop,
16  const Parameters& params, const Data& d)
17 {
28  //< number of samples
29  unsigned int N = pop.individuals.at(0).error.size();
30  //< number of individuals
31  unsigned int P = pop.individuals.size();
32  // define epsilon
33  ArrayXf epsilon = ArrayXf::Zero(N);
34 
35  // if output is continuous, use epsilon lexicase
36  if (!params.classification || params.scorer_.compare("log")==0
37  || params.scorer_.compare("multi_log")==0)
38  {
39  // for each sample, calculate epsilon
40  for (int i = 0; i<epsilon.size(); ++i)
41  {
42  VectorXf case_errors(pop.individuals.size());
43  for (int j = 0; j<pop.individuals.size(); ++j)
44  {
45  case_errors(j) = pop.individuals.at(j).error(i);
46  }
47  epsilon(i) = mad(case_errors);
48  }
49  }
50 
51  // selection pool
52  vector<size_t> starting_pool;
53  for (int i = 0; i < pop.individuals.size(); ++i)
54  {
55  starting_pool.push_back(i);
56  }
57  assert(starting_pool.size() == P);
58 
59  vector<size_t> selected(P,0); // selected individuals
60  #pragma omp parallel for
61  for (unsigned int i = 0; i<P; ++i) // selection loop
62  {
63  vector<size_t> cases; // cases (samples)
64  if (params.classification && !params.class_weights.empty())
65  {
66  // for classification problems, weight case selection
67  // by class weights
68  vector<size_t> choices(N);
69  std::iota(choices.begin(), choices.end(),0);
70  vector<float> sample_weights = params.sample_weights;
71  for (unsigned i = 0; i<N; ++i)
72  {
73  vector<size_t> choice_idxs(N-i);
74  std::iota(choice_idxs.begin(),choice_idxs.end(),0);
75  size_t idx = r.random_choice(choice_idxs,
76  sample_weights);
77  cases.push_back(choices.at(idx));
78  choices.erase(choices.begin() + idx);
79  sample_weights.erase(sample_weights.begin() + idx);
80  }
81  }
82  else
83  { // otherwise, choose cases randomly
84  cases.resize(N);
85  std::iota(cases.begin(),cases.end(),0);
86  r.shuffle(cases.begin(),cases.end()); // shuffle cases
87  }
88  vector<size_t> pool = starting_pool; // initial pool
89  vector<size_t> winner; // winners
90 
91  bool pass = true; // checks pool size and number of cases
92  unsigned int h = 0; // case count
93 
94  while(pass){ // main loop
95 
96  winner.resize(0); // winners
97  // minimum error on case
98  float minfit = std::numeric_limits<float>::max();
99 
100  // get minimum
101  for (size_t j = 0; j<pool.size(); ++j)
102  if (pop.individuals.at(pool[j]).error(cases[h]) < minfit)
103  minfit = pop.individuals.at(pool[j]).error(cases[h]);
104 
105  // select best
106  for (size_t j = 0; j<pool.size(); ++j)
107  if (pop.individuals.at(pool[j]).error(cases[h])
108  <= minfit+epsilon[cases[h]])
109  winner.push_back(pool[j]);
110 
111  ++h; // next case
112  // only keep going if needed
113  pass = (winner.size()>1 && h<cases.size());
114 
115  if(winner.size() == 0)
116  {
117  if(h >= cases.size())
118  winner.push_back(r.random_choice(pool));
119  else
120  pass = true;
121  }
122  else
123  pool = winner; // reduce pool to remaining individuals
124 
125  }
126 
127 
128  assert(winner.size()>0);
129  //if more than one winner, pick randomly
130  selected.at(i) = r.random_choice(winner);
131  }
132 
133  if (selected.size() != pop.individuals.size())
134  {
135  std::cout << "selected: " ;
136  for (auto s: selected) std::cout << s << " "; std::cout << "\n";
137  THROW_LENGTH_ERROR("Lexicase did not select correct number of \
138  parents");
139  }
140  return selected;
141 }
142 
144  const Parameters& params, const Data& d)
145 {
146  /* Lexicase survival */
147  THROW_RUNTIME_ERROR("Lexicase survival not implemented");
148  return vector<size_t>();
149 }
150 
151 }
152 
153 }
data holding X, y, and Z data
Definition: data.h:42
T random_choice(const vector< T > &v)
Definition: rnd.h:73
void shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition: rnd.h:53
#define THROW_LENGTH_ERROR(err)
Definition: error.h:32
#define THROW_RUNTIME_ERROR(err)
Definition: error.h:30
T pop(vector< T > *v)
Definition: auto_backprop.h:49
float mad(const ArrayXf &x)
median absolute deviation
Definition: utils.cc:189
static Rnd & r
Definition: rnd.h:135
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
holds the hyperparameters for Feat.
Definition: params.h:25
bool classification
flag to conduct classification rather than
Definition: params.h:32
vector< float > class_weights
weights for each class
Definition: params.h:60
vector< float > sample_weights
weights for each sample
Definition: params.h:61
string scorer_
actual loss function used, determined by scorer
Definition: params.h:63
Defines a population of programs and functions for constructing them.
Definition: population.h:28
vector< size_t > select(Population &pop, const Parameters &params, const Data &d)
function returns a set of selected indices from pop
Definition: lexicase.cc:15
Lexicase(bool surv)
Definition: lexicase.cc:11
vector< size_t > survive(Population &pop, const Parameters &params, const Data &d)
lexicase survival
Definition: lexicase.cc:143