Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
population.cpp
Go to the documentation of this file.
1#include "population.h"
2
3namespace Brush{
4namespace Pop{
5
6template<ProgramType T>
8{
9 individuals.resize(0);
10 mig_prob = 0.0;
11 pop_size = 0;
12 num_islands = 0;
13 linear_complexity = false;
14}
15
16
17template<ProgramType T>
18void Population<T>::init(vector<Individual<T>>& new_individuals, const Parameters& params)
19{
20 if (new_individuals.size() != params.pop_size
21 && new_individuals.size() != 2*params.pop_size ) {
22 throw std::runtime_error("Individual vector has different number of individuals than pop_size. popsize is "+to_string(params.pop_size)+", number of individuals is " + to_string(new_individuals.size()));
23 }
24
25 this->mig_prob = params.mig_prob;
26 this->pop_size = params.pop_size;
27 this->num_islands=params.num_islands;
28 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
29
31
32 // If the assert fails, execution stops, but for completeness, you can also throw an exception
33 size_t p = pop_size;
34
35 individuals.resize(2*p);
36 std::fill(individuals.begin(), individuals.end(), nullptr);
37
38 for (int i=0; i<num_islands; ++i)
39 {
40 size_t idx_start = std::floor(i*p/num_islands);
41 size_t idx_end = std::floor((i+1)*p/num_islands);
42
43 auto delta = idx_end - idx_start;
44
45 island_indexes.at(i).resize(delta);
46 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
47
48 if (new_individuals.size() == 2*params.pop_size) { // pop + offspring
49 island_indexes.at(i).resize(delta*2);
50 iota(
51 island_indexes.at(i).begin() + delta, island_indexes.at(i).end(),
52 p+idx_start);
53 }
54 };
55
56 for (int j=0; j< new_individuals.size(); j++) {
57 individuals.at(j) = std::make_shared<Individual<T>>(new_individuals.at(j));
58 }
59}
60
61template<ProgramType T>
63{
64 this->mig_prob = params.mig_prob;
65 this->pop_size = params.pop_size;
66 this->num_islands=params.num_islands;
67 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
68
69 // Tuples with start and end indexes for each island. Number of individuals
70 // in each island can slightly differ if num_islands is not a divisor of p (popsize)
72
73 size_t p = pop_size; // population size
74
75 for (int i=0; i<num_islands; ++i)
76 {
77 size_t idx_start = std::floor(i*p/num_islands);
78 size_t idx_end = std::floor((i+1)*p/num_islands);
79
80 auto delta = idx_end - idx_start;
81
82 island_indexes.at(i).resize(delta);
83 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
84 };
85
86 // this calls the default constructor for the container template class
87 individuals.resize(2*p); // we will never increase or decrease the size during execution (because is not thread safe). this way, theres no need to sync between selecting and varying the population
88
89 for (int i = 0; i< p; ++i)
90 {
91 // first half will contain the initial population
92 individuals.at(i) = std::make_shared<Individual<T>>();
93 individuals.at(i)->init(ss, params);
94
95 // second half is space to the offspring (but we dont initialize them)
96 individuals.at(p+i) = nullptr;
97 }
98}
99
100template<ProgramType T>
101void Population<T>::save(string filename)
102{
103 std::ofstream out;
104 if (!filename.empty())
105 out.open(filename);
106 else
107 out.open("population.json");
108
109 json j;
110 to_json(j, *this);
111 out << j ;
112 out.close();
113 logger.log("Saved population to file " + filename, 1);
114}
115
116template<ProgramType T>
117void Population<T>::load(string filename)
118{
119 std::ifstream indata;
120 indata.open(filename);
121 if (!indata.good())
122 HANDLE_ERROR_THROW("Invalid input file " + filename + "\n");
123
124 std::string line;
125 indata >> line;
126
127 json j = json::parse(line);
128 from_json(j, *this);
129
130 logger.log("Loaded population from " + filename + " of size = "
131 + to_string(this->size()),1);
132
133 indata.close();
134}
135
137template<ProgramType T>
139{
140 size_t p = pop_size; // population size. prep_offspring slots will douple the population, adding the new expressions into the islands
141
142 // this is going to be tricky (pay attention to delta and p use)
143 size_t idx_start = std::floor(island*p/num_islands);
144 size_t idx_end = std::floor((island+1)*p/num_islands);
145
146 auto delta = idx_end - idx_start; // island size
147
148 // inserting indexes of the offspring
149 island_indexes.at(island).resize(island_indexes.at(island).size() + delta);
150 iota(
151 island_indexes.at(island).begin() + delta, island_indexes.at(island).end(),
152 p+idx_start);
153
154 // Im keeping the offspring and parents in the same population object, because we
155 // have operations that require them together (archive, hall of fame.)
156 // The downside is having to be aware that islands will create offsprings
157 // intercalated with other islands
158}
159
160template<ProgramType T>
161void Population<T>::update(vector<vector<size_t>> survivors)
162{
163
164 // this is the step that should end up cutting off half of the population
165 vector<Individual<T>> new_pop;
166 new_pop.resize(0);
167 for (int k=0; k<survivors.at(0).size(); ++k){
168 new_pop.push_back( *individuals.at(survivors.at(0).at(k)) );
169 }
170
171 assert(new_pop.size() == individuals.size() && " migration ended up with a different popsize");
172
173 for (int j=0; j<num_islands; ++j)
174 {
175
176 // need to make island point to original range
177 size_t idx_start = std::floor(j*pop_size/num_islands);
178 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
179 auto delta = idx_end - idx_start;
180
181 // inserting indexes of the offspring
182 island_indexes.at(j).clear();
183 island_indexes.at(j).resize(delta);
184 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
185 }
186
187 assert(new_pop.size() == pop_size
188 && " update ended up with a different popsize");
189
190 this->individuals.resize(0);
191 for (auto ind : new_pop)
192 {
193 // making hard copies of the individuals
194 json ind_copy = ind;
195
196 // this will fill just half of the individuals vector
197 individuals.push_back(
198 std::make_shared<Individual<T>>(ind_copy) );
199 }
200
201 assert(individuals.size() == pop_size
202 && " number of new individuals is different from pop size");
203
204 for (int i=0; i< pop_size; ++i)
205 {
206 // second half is space to the offspring (but we dont initialize them)
207 individuals.push_back(nullptr);
208 }
209}
210
211template<ProgramType T>
213{
214 // TODO: rename it. This function does not print anything, just returns a string
215 string output = "";
216
217 for (int j=0; j<num_islands; ++j)
218 {
219 for (int k=0; k<island_indexes.at(j).size(); ++k) {
220 Individual<T>& ind = *individuals.at(island_indexes.at(j).at(k)).get();
221
222 output += "island " + to_string(j) + " ";
223 output += "index " + to_string(island_indexes.at(j).at(k)) + ": ";
224 output += ind.get_model() + " ";
225 output += ind.fitness.toString() + sep;
226 }
227 }
228 return output;
229}
230
231template<ProgramType T>
232vector<vector<size_t>> Population<T>::sorted_front(unsigned rank)
233{
234 // this is used to migration and update archive at the end of a generation. expect islands without offspring
235
236 /* Returns individuals on the Pareto front, sorted by increasign complexity. */
237 vector<vector<size_t>> pf_islands;
238 pf_islands.resize(num_islands);
239
240 for (int j=0;j<num_islands; ++j)
241 {
242 auto indices = island_indexes.at(j);
243 vector<size_t> pf;
244
245 for (int i=0; i<indices.size(); ++i)
246 {
247 // this assumes that rank was previously calculated. It is set in selection (ie nsga2) if the information is useful to select/survive
248 if (individuals.at(indices.at(i))->fitness.rank == rank)
249 pf.push_back(i);
250 }
251
252 if (this->linear_complexity)
253 std::sort(pf.begin(),pf.end(),SortLinearComplexity(*this));
254 else
255 std::sort(pf.begin(),pf.end(),SortComplexity(*this));
256
257 auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this));
258
259 pf.resize(std::distance(pf.begin(),it));
260 pf_islands.at(j) = pf;
261 }
262
263 return pf_islands;
264}
265
266template<ProgramType T>
267vector<size_t> Population<T>::hall_of_fame(unsigned rank)
268{
269 // Inspired in fast nds from nsga2
270
271 // TODO: use hall of fame instead of re-implmementing this feature in
272 // archive init and update functions
273
274 // this is used to migration and update archive at the end of a generation.
275 // Thiis function expects islands without offspring
276
277 // TODO: run fast_nds here to get the pareto fronts? I think this could improve performance
278 vector<size_t> merged_islands(0);
279
280 for (int j=0;j<num_islands; ++j)
281 {
282 auto indices = island_indexes.at(j);
283 for (int i=0; i<indices.size(); ++i)
284 {
285 merged_islands.push_back(indices.at(i));
286 }
287 }
288
289 // checking if there is no dominance between different fronts
290 // (without updating their fitness objects)
291 vector<size_t> hof;
292 hof.clear();
293
294 for (int i = 0; i < merged_islands.size(); ++i) {
295 int dcount = 0;
296
297 auto p = individuals.at(merged_islands[i]);
298
299 for (int j = 0; j < merged_islands.size(); ++j) {
300 const Individual<T>& q = (*individuals.at(merged_islands[j]));
301
302 int compare = p->fitness.dominates(q.fitness);
303 if (compare == -1) { // q dominates p
304 //p.dcounter += 1;
305 dcount += 1;
306 }
307 }
308
309 if (dcount == 0) {
310 hof.push_back(merged_islands[i]);
311 }
312 }
313
314 if (this->linear_complexity)
315 std::sort(hof.begin(),hof.end(),SortLinearComplexity(*this));
316 else
317 std::sort(hof.begin(),hof.end(),SortComplexity(*this));
318
319 auto it = std::unique(hof.begin(),hof.end(),SameFitComplexity(*this));
320
321 hof.resize(std::distance(hof.begin(),it));
322
323 return hof;
324}
325
326template<ProgramType T>
328{
329 // changes where island points to by shuffling it
330
331 if (num_islands==1)
332 return; // skipping. this only work because update is fixing island indexes
333
334 // This method is not thread safe (as it is now)
335 vector<vector<size_t>> new_island_indexes;
336 new_island_indexes.resize(num_islands);
337
338 for (int island=0; island<num_islands; ++island)
339 {
340 new_island_indexes.at(island).resize(0);
341
342 auto indices = island_indexes.at(island);
343 for (unsigned int i=0; i<indices.size(); ++i)
344 {
345 if (r() < mig_prob)
346 {
347 size_t migrating_idx;
348
349 vector<int> other_islands(num_islands-1);
350 iota(other_islands.begin(), other_islands.end(), 0);
351
352 // skipping current island
353 auto it = other_islands.begin();
354 std::advance(it, island);
355 for (;it != other_islands.end(); ++it) {
356 ++(*it);
357 }
358
359 // picking other island
360 int other_island = *r.select_randomly(
361 other_islands.begin(),
362 other_islands.end());
363
364 migrating_idx = *r.select_randomly(
365 island_indexes.at(other_island).begin(),
366 island_indexes.at(other_island).end());
367
368 new_island_indexes.at(island).push_back(migrating_idx);
369 }
370 else
371 {
372 new_island_indexes.at(island).push_back(indices.at(i));
373 }
374 }
375 }
376
377 // making hard copies (so the next generation starts with islands that does not share individuals
378 // this is particularly important to avoid multiple threads assigning different rank/crowdist/dcounter
379 // or different fitness)
380
381 vector<Individual<T>> new_pop;
382 new_pop.resize(0);
383 for (int j=0; j<num_islands; ++j)
384 {
385 for (int k=0; k<new_island_indexes.at(j).size(); ++k){
386 new_pop.push_back(
387 *individuals.at(new_island_indexes.at(j).at(k)) );
388 }
389
390 // need to make island point to original range
391 size_t idx_start = std::floor(j*pop_size/num_islands);
392 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
393
394 auto delta = idx_end - idx_start;
395
396 assert(delta == new_island_indexes.at(j).size()
397 && " new pop has the wrong number of new individuals");
398
399 // inserting indexes of the offspring
400 island_indexes.at(j).clear();
401 island_indexes.at(j).resize(delta);
402 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
403 }
404
405 assert(new_pop.size() == pop_size
406 && " migration ended up with a different popsize");
407
408 this->individuals.resize(0);
409 for (auto ind : new_pop)
410 {
411 // making hard copies of the individuals
412 json ind_copy = ind;
413
414 // this will fill just half of the pop
415 individuals.push_back(
416 std::make_shared<Individual<T>>(ind_copy) );
417 }
418 for (int i=0; i< pop_size; ++i)
419 {
420 // second half is space to the offspring (but we dont initialize them)
421 individuals.push_back(nullptr);
422 }
423}
424
425} // Pop
426} // Brush
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:104
Fitness fitness
aggregate fitness score
Definition individual.h:37
bool linear_complexity
Indicates if the user set linear_complexity instead of recursive complexity.
Definition population.h:17
int size()
returns population size (the effective size of the individuals)
Definition population.h:37
void add_offspring_indexes(int island)
update individual vector size, distributing the expressions in num_islands
void save(string filename)
vector< vector< size_t > > sorted_front(unsigned rank=1)
return complexity-sorted Pareto front indices for each island
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:19
void init(SearchSpace &ss, const Parameters &params)
initialize population of programs with a starting model and/or from file
string print_models(string sep="\n")
return population equations.
vector< vector< size_t > > island_indexes
Definition population.h:20
vector< size_t > hall_of_fame(unsigned rank=1)
void load(string filename)
void update(vector< vector< size_t > > survivors)
reduce programs to the indices in survivors. Not thread safe,as it removes elements
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
void to_json(json &j, const Individual< T > &p)
Definition individual.h:176
void from_json(const json &j, Individual< T > &p)
Definition individual.h:191
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
static Logger & logger
Definition logger.h:60
bool in(const V &v, const T &i)
check if element is in vector.
Definition utils.h:192
static Rnd & r
Definition rnd.h:176
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
std::string toString() const
Definition fitness.h:199
int dominates(const Fitness &b) const
set obj vector given a string of objective names
Definition fitness.cpp:55
vector< string > objectives
Definition params.h:40
check for same fitness and complexity to filter uniqueness.
Definition population.h:87
Sort each island in increasing complexity. This is not thread safe. I should set complexities of the ...
Definition population.h:66
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...