Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
lexicase.cpp
Go to the documentation of this file.
1#include "lexicase.h"
2
3namespace Brush {
4namespace Sel {
5
6using namespace Brush;
7using namespace Pop;
8using namespace Sel;
9
10template<ProgramType T>
12{
13 this->name = "lexicase";
14 this->survival = surv;
15}
16
17template<ProgramType T>
18vector<size_t> Lexicase<T>::select(Population<T>& pop, int island,
19 const Parameters& params)
20{
21 // this one can be executed in parallel because it is just reading the errors. This
22 // method assumes that the expressions have been fitted previously, and their respective
23 // error vectors are filled
24
26
27 // if this is first generation, just return indices to pop
28 if (params.current_gen==0)
29 return island_pool;
30
31 //< number of samples
32 unsigned int N = pop.individuals.at(island_pool.at(0))->error.size();
33
34 //< number of individuals
35 unsigned int P = island_pool.size();
36
37 // define epsilon
38 ArrayXf epsilon = ArrayXf::Zero(N);
39
40 // if output is continuous, use epsilon lexicase
41 if (!params.classification || params.scorer_.compare("log")==0
42 || params.scorer_.compare("multi_log")==0)
43 {
44 // for each sample, calculate epsilon
45 for (int i = 0; i<epsilon.size(); ++i)
46 {
47 VectorXf case_errors(island_pool.size());
48 for (int j = 0; j<island_pool.size(); ++j)
49 {
50 case_errors(j) = pop.individuals.at(island_pool[j])->error(i);
51 }
53 }
54 }
55 assert(epsilon.size() == N);
56
57 // selection pool
58 vector<size_t> starting_pool;
59 for (int i = 0; i < island_pool.size(); ++i)
60 {
61 starting_pool.push_back(island_pool[i]);
62 }
63 assert(starting_pool.size() == P);
64
65 vector<size_t> selected(P,0); // selected individuals
66
67 for (unsigned int i = 0; i<P; ++i) // selection loop
68 {
69 vector<size_t> cases; // cases (samples)
70 if (params.classification && !params.class_weights.empty())
71 {
72 // for classification problems, weight case selection
73 // by class weights
74 vector<size_t> choices(N);
75 std::iota(choices.begin(), choices.end(),0);
76
77 vector<float> sample_weights = params.sample_weights;
78
79 for (unsigned i = 0; i<N; ++i)
80 {
81 vector<size_t> choice_indices(N-i);
82 std::iota(choice_indices.begin(),choice_indices.end(),0);
83
84 size_t idx = *r.select_randomly(
85 choice_indices.begin(), choice_indices.end(),
86 sample_weights.begin(), sample_weights.end());
87
88 cases.push_back(choices.at(idx));
89 choices.erase(choices.begin() + idx);
90
91 sample_weights.erase(sample_weights.begin() + idx);
92 }
93 }
94 else
95 { // otherwise, choose cases randomly
96 cases.resize(N);
97 std::iota(cases.begin(),cases.end(),0);
98 r.shuffle(cases.begin(),cases.end()); // shuffle cases
99 }
100 vector<size_t> pool = starting_pool; // initial pool
101 vector<size_t> winner; // winners
102
103 bool pass = true; // checks pool size and number of cases
104 unsigned int h = 0; // case count
105
106 float epsilon_threshold;
107
108 while(pass){ // main loop
110
111 winner.resize(0); // winners
112 // minimum error on case
113 float minfit = std::numeric_limits<float>::max();
114
115 // get minimum
116 for (size_t j = 0; j<pool.size(); ++j)
117 if (pop.individuals.at(pool[j])->error(cases[h]) < minfit)
118 minfit = pop.individuals.at(pool[j])->error(cases[h]);
119
120 // criteria to stay in pool
122
123 // select best
124 for (size_t j = 0; j<pool.size(); ++j)
125 if (pop.individuals.at(pool[j])->error(cases[h])
127 winner.push_back(pool[j]);
128
129 ++h; // next case
130 // only keep going if needed
131 pass = (winner.size()>1 && h<cases.size());
132
133 if(winner.size() == 0)
134 {
135 if(h >= cases.size())
136 winner.push_back(*r.select_randomly(
137 pool.begin(), pool.end()) );
138 else
139 pass = true;
140 }
141 else
142 pool = winner; // reduce pool to remaining individuals
143 }
144
145 assert(winner.size()>0);
146
147 //if more than one winner, pick randomly
149 winner.begin(), winner.end() );
150
151 // cout << "parallel end index " + to_string(i) << endl;
152 }
153
154 if (selected.size() != island_pool.size())
155 {
156 // std::cout << "selected: " ;
157 // for (auto s: selected) std::cout << s << " "; std::cout << "\n";
158 HANDLE_ERROR_THROW("Lexicase did not select correct number of \
159 parents");
160 }
161
162 return selected;
163}
164
165template<ProgramType T>
167 const Parameters& params)
168{
169 /* Lexicase survival */
170 HANDLE_ERROR_THROW("Lexicase survival not implemented");
171 return vector<size_t>();
172}
173
174}
175}
void bind_engine(py::module &m, string name)
vector< size_t > get_island_indexes(int island)
Definition population.h:38
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:18
Lexicase(bool surv=false)
Definition lexicase.cpp:11
vector< size_t > survive(Population< T > &pop, int island, const Parameters &p)
lexicase survival
Definition lexicase.cpp:166
vector< size_t > select(Population< T > &pop, int island, const Parameters &p)
function returns a set of selected indices from pop
Definition lexicase.cpp:18
void shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition rnd.h:49
Iter select_randomly(Iter start, Iter end)
Definition rnd.h:61
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
float mad(const ArrayXf &x)
median absolute deviation
Definition utils.cpp:373
static Rnd & r
Definition rnd.h:174
< nsga2 selection operator for getting the front
Definition data.cpp:12
vector< float > sample_weights
weights for each sample
Definition params.h:67
bool classification
Definition params.h:71
string scorer_
actual loss function used, determined by error
Definition params.h:63
vector< float > class_weights
weights for each class
Definition params.h:66
unsigned int current_gen
Definition params.h:27