Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
population.cpp
Go to the documentation of this file.
1#include "population.h"
2
3namespace Brush{
4namespace Pop{
5
6template<ProgramType T>
8{
9 individuals.resize(0);
10 mig_prob = 0.0;
11 pop_size = 0;
12 num_islands = 0;
13 linear_complexity = false;
14}
15
16
17template<ProgramType T>
18void Population<T>::init(vector<Individual<T>>& new_individuals, const Parameters& params)
19{
20 if (new_individuals.size() != params.pop_size
21 && new_individuals.size() != 2*params.pop_size ) {
22 throw std::runtime_error("Individual vector has different number of individuals than pop_size. popsize is "+to_string(params.pop_size)+", number of individuals is " + to_string(new_individuals.size()));
23 }
24
25 this->mig_prob = params.mig_prob;
26 this->pop_size = params.pop_size;
27 this->num_islands=params.num_islands;
28 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
29
31
32 // If the assert fails, execution stops, but for completeness, you can also throw an exception
33 size_t p = pop_size;
34
35 individuals.resize(2*p);
36 std::fill(individuals.begin(), individuals.end(), nullptr);
37
38 for (int i=0; i<num_islands; ++i)
39 {
40 size_t idx_start = std::floor(i*p/num_islands);
41 size_t idx_end = std::floor((i+1)*p/num_islands);
42
43 auto delta = idx_end - idx_start;
44
45 island_indexes.at(i).resize(delta);
46 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
47
48 if (new_individuals.size() == 2*params.pop_size) { // pop + offspring
49 island_indexes.at(i).resize(delta*2);
50 iota(
51 island_indexes.at(i).begin() + delta, island_indexes.at(i).end(),
52 p+idx_start);
53 }
54 };
55
56 for (int j=0; j< new_individuals.size(); j++) {
57 individuals.at(j) = std::make_shared<Individual<T>>(new_individuals.at(j));
58 }
59}
60
61template<ProgramType T>
63{
64 this->mig_prob = params.mig_prob;
65 this->pop_size = params.pop_size;
66 this->num_islands=params.num_islands;
67 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
68
69 // Tuples with start and end indexes for each island. Number of individuals
70 // in each island can slightly differ if num_islands is not a divisor of p (popsize)
72
73 size_t p = pop_size; // population size
74
75 for (int i=0; i<num_islands; ++i)
76 {
77 size_t idx_start = std::floor(i*p/num_islands);
78 size_t idx_end = std::floor((i+1)*p/num_islands);
79
80 auto delta = idx_end - idx_start;
81
82 island_indexes.at(i).resize(delta);
83 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
84 };
85
86 // this calls the default constructor for the container template class
87 individuals.resize(2*p); // we will never increase or decrease the size during execution (because is not thread safe). this way, theres no need to sync between selecting and varying the population
88
89 for (int i = 0; i< p; ++i)
90 {
91 // first half will contain the initial population
92 individuals.at(i) = std::make_shared<Individual<T>>();
93 individuals.at(i)->init(ss, params);
94
95 // second half is space to the offspring (but we dont initialize them)
96 individuals.at(p+i) = nullptr;
97 }
98}
99
100template<ProgramType T>
101void Population<T>::save(string filename)
102{
103 std::ofstream out;
104 if (!filename.empty())
105 out.open(filename);
106 else
107 out.open("population.json");
108
109 json j;
110 to_json(j, *this);
111 out << j ;
112 out.close();
113 logger.log("Saved population to file " + filename, 1);
114}
115
116template<ProgramType T>
117void Population<T>::load(string filename)
118{
119 std::ifstream indata;
120 indata.open(filename);
121 if (!indata.good())
122 HANDLE_ERROR_THROW("Invalid input file " + filename + "\n");
123
124 std::string line;
125 indata >> line;
126
127 json j = json::parse(line);
128 from_json(j, *this);
129
130 logger.log("Loaded population from " + filename + " of size = "
131 + to_string(this->size()),1);
132
133 indata.close();
134}
135
137template<ProgramType T>
139{
140 size_t p = pop_size; // population size. prep_offspring slots will douple the population, adding the new expressions into the islands
141
142 // this is going to be tricky (pay attention to delta and p use)
143 size_t idx_start = std::floor(island*p/num_islands);
144 size_t idx_end = std::floor((island+1)*p/num_islands);
145
146 auto delta = idx_end - idx_start; // island size
147
148 // inserting indexes of the offspring
149 island_indexes.at(island).resize(island_indexes.at(island).size() + delta);
150 iota(
151 island_indexes.at(island).begin() + delta, island_indexes.at(island).end(),
152 p+idx_start);
153
154 // Im keeping the offspring and parents in the same population object, because we
155 // have operations that require them together (archive, hall of fame.)
156 // The downside is having to be aware that islands will create offsprings
157 // intercalated with other islands
158}
159
160template<ProgramType T>
161void Population<T>::update(vector<vector<size_t>> survivors)
162{
163
164 // this is the step that should end up cutting off half of the population
165 vector<Individual<T>> new_pop;
166 new_pop.resize(0);
167 for (int k=0; k<survivors.at(0).size(); ++k){
168 new_pop.push_back( *individuals.at(survivors.at(0).at(k)) );
169 }
170
171 // Verify we got the right number of survivors (should equal pop_size, not 2*pop_size)
172 assert(new_pop.size() == pop_size && " number of survivors is different from pop size");
173
174 for (int j=0; j<num_islands; ++j)
175 {
176
177 // need to make island point to original range
178 size_t idx_start = std::floor(j*pop_size/num_islands);
179 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
180 auto delta = idx_end - idx_start;
181
182 // inserting indexes of the offspring
183 island_indexes.at(j).clear();
184 island_indexes.at(j).resize(delta);
185 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
186 }
187
188 assert(new_pop.size() == pop_size
189 && " update ended up with a different popsize");
190
191 this->individuals.resize(0);
192 for (auto ind : new_pop)
193 {
194 // making hard copies of the individuals
195 json ind_copy = ind;
196
197 // this will fill just half of the individuals vector
198 individuals.push_back(
199 std::make_shared<Individual<T>>(ind_copy) );
200 }
201
202 assert(individuals.size() == pop_size
203 && " number of new individuals is different from pop size");
204
205 for (int i=0; i< pop_size; ++i)
206 {
207 // second half is space to the offspring (but we dont initialize them)
208 individuals.push_back(nullptr);
209 }
210}
211
212template<ProgramType T>
214{
215 // TODO: rename it. This function does not print anything, just returns a string
216 string output = "";
217
218 for (int j=0; j<num_islands; ++j)
219 {
220 for (int k=0; k<island_indexes.at(j).size(); ++k) {
221 Individual<T>& ind = *individuals.at(island_indexes.at(j).at(k)).get();
222
223 output += "island " + to_string(j) + " ";
224 output += "index " + to_string(island_indexes.at(j).at(k)) + ": ";
225 output += ind.get_model() + " ";
226 output += ind.fitness.toString() + sep;
227 }
228 }
229 return output;
230}
231
232template<ProgramType T>
233vector<vector<size_t>> Population<T>::sorted_front(unsigned rank)
234{
235 // this is used to migration and update archive at the end of a generation. expect islands without offspring
236
237 /* Returns individuals on the Pareto front, sorted by increasign complexity. */
238 vector<vector<size_t>> pf_islands;
239 pf_islands.resize(num_islands);
240
241 for (int j=0;j<num_islands; ++j)
242 {
243 auto indices = island_indexes.at(j);
244 vector<size_t> pf;
245
246 for (int i=0; i<indices.size(); ++i)
247 {
248 auto& ind_ptr = individuals.at(indices.at(i));
249
250 // Skip nullptr individuals (offspring slots)
251 if (!ind_ptr)
252 continue;
253
254 // this assumes that rank was previously calculated. It is set in selection (ie nsga2) if the information is useful to select/survive
255 if (ind_ptr->fitness.rank == rank)
256 pf.push_back(i);
257 }
258
259 if (this->linear_complexity)
260 std::stable_sort(pf.begin(),pf.end(),SortLinearComplexity(*this));
261 else
262 std::stable_sort(pf.begin(),pf.end(),SortComplexity(*this));
263
264 auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this));
265
266 pf.resize(std::distance(pf.begin(),it));
267 pf_islands.at(j) = pf;
268 }
269
270 return pf_islands;
271}
272
273template<ProgramType T>
274vector<size_t> Population<T>::hall_of_fame(unsigned rank)
275{
276 // Inspired in fast nds from nsga2
277
278 // TODO: use hall of fame instead of re-implmementing this feature in
279 // archive init and update functions
280
281 // this is used to migration and update archive at the end of a generation.
282 // Thiis function expects islands without offspring
283
284 // TODO: run fast_nds here to get the pareto fronts? I think this could improve performance
285 vector<size_t> merged_islands(0);
286
287 for (int j=0;j<num_islands; ++j)
288 {
289 auto indices = island_indexes.at(j);
290 for (int i=0; i<indices.size(); ++i)
291 {
292 merged_islands.push_back(indices.at(i));
293 }
294 }
295
296 // checking if there is no dominance between different fronts
297 // (without updating their fitness objects)
298 vector<size_t> hof;
299 hof.clear();
300
301 for (int i = 0; i < merged_islands.size(); ++i) {
302 int dcount = 0;
303
304 auto p = individuals.at(merged_islands[i]);
305
306 // Skip nullptr individuals
307 if (!p)
308 continue;
309
310 for (int j = 0; j < merged_islands.size(); ++j) {
311 auto q_ptr = individuals.at(merged_islands[j]);
312
313 // Skip nullptr individuals
314 if (!q_ptr)
315 continue;
316
317 const Individual<T>& q = *q_ptr;
318
319 int compare = p->fitness.dominates(q.fitness);
320 if (compare == -1) { // q dominates p
321 //p.dcounter += 1;
322 dcount += 1;
323 }
324 }
325
326 if (dcount == 0) {
327 hof.push_back(merged_islands[i]);
328 }
329 }
330
331 if (this->linear_complexity)
332 std::stable_sort(hof.begin(),hof.end(),SortLinearComplexity(*this));
333 else
334 std::stable_sort(hof.begin(),hof.end(),SortComplexity(*this));
335
336 auto it = std::unique(hof.begin(),hof.end(),SameFitComplexity(*this));
337
338 hof.resize(std::distance(hof.begin(),it));
339
340 return hof;
341}
342
343template<ProgramType T>
345{
346 // changes where island points to by shuffling it
347
348 if (num_islands==1)
349 return; // skipping. this only work because update is fixing island indexes
350
351 // This method is not thread safe (as it is now)
352 vector<vector<size_t>> new_island_indexes;
353 new_island_indexes.resize(num_islands);
354
355 for (int island=0; island<num_islands; ++island)
356 {
357 new_island_indexes.at(island).resize(0);
358
359 auto indices = island_indexes.at(island);
360 for (unsigned int i=0; i<indices.size(); ++i)
361 {
362 if (r() < mig_prob)
363 {
364 size_t migrating_idx;
365
366 vector<int> other_islands(num_islands-1);
367 iota(other_islands.begin(), other_islands.end(), 0);
368
369 // skipping current island
370 auto it = other_islands.begin();
371 std::advance(it, island);
372 for (;it != other_islands.end(); ++it) {
373 ++(*it);
374 }
375
376 // picking other island
377 int other_island = *r.select_randomly(
378 other_islands.begin(),
379 other_islands.end());
380
381 migrating_idx = *r.select_randomly(
382 island_indexes.at(other_island).begin(),
383 island_indexes.at(other_island).end());
384
385 new_island_indexes.at(island).push_back(migrating_idx);
386 }
387 else
388 {
389 new_island_indexes.at(island).push_back(indices.at(i));
390 }
391 }
392 }
393
394 // making hard copies (so the next generation starts with islands that does not share individuals
395 // this is particularly important to avoid multiple threads assigning different rank/crowdist/dcounter
396 // or different fitness)
397
398 vector<Individual<T>> new_pop;
399 new_pop.resize(0);
400 for (int j=0; j<num_islands; ++j)
401 {
402 for (int k=0; k<new_island_indexes.at(j).size(); ++k){
403 auto& ind_ptr = individuals.at(new_island_indexes.at(j).at(k));
404
405 // Skip nullptr individuals (should not happen in migrate, but be safe)
406 if (!ind_ptr)
407 continue;
408
409 new_pop.push_back(*ind_ptr);
410 }
411
412 // need to make island point to original range
413 size_t idx_start = std::floor(j*pop_size/num_islands);
414 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
415
416 auto delta = idx_end - idx_start;
417
418 assert(delta == new_island_indexes.at(j).size()
419 && " new pop has the wrong number of new individuals");
420
421 // inserting indexes of the offspring
422 island_indexes.at(j).clear();
423 island_indexes.at(j).resize(delta);
424 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
425 }
426
427 assert(new_pop.size() == pop_size
428 && " migration ended up with a different popsize");
429
430 this->individuals.resize(0);
431 for (auto ind : new_pop)
432 {
433 // making hard copies of the individuals
434 json ind_copy = ind;
435
436 // this will fill just half of the pop
437 individuals.push_back(
438 std::make_shared<Individual<T>>(ind_copy) );
439 }
440 for (int i=0; i< pop_size; ++i)
441 {
442 // second half is space to the offspring (but we dont initialize them)
443 individuals.push_back(nullptr);
444 }
445}
446
447} // Pop
448} // Brush
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:130
Fitness fitness
aggregate fitness score
Definition individual.h:37
bool linear_complexity
Indicates if the user set linear_complexity instead of recursive complexity.
Definition population.h:17
int size()
returns population size (the effective size of the individuals)
Definition population.h:37
void add_offspring_indexes(int island)
update individual vector size, distributing the expressions in num_islands
void save(string filename)
vector< vector< size_t > > sorted_front(unsigned rank=1)
return complexity-sorted Pareto front indices for each island
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:19
void init(SearchSpace &ss, const Parameters &params)
initialize population of programs with a starting model and/or from file
string print_models(string sep="\n")
return population equations.
vector< vector< size_t > > island_indexes
Definition population.h:20
vector< size_t > hall_of_fame(unsigned rank=1)
void load(string filename)
void update(vector< vector< size_t > > survivors)
reduce programs to the indices in survivors. Not thread safe,as it removes elements
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
void to_json(json &j, const Individual< T > &p)
Definition individual.h:202
void from_json(const json &j, Individual< T > &p)
Definition individual.h:217
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
static Logger & logger
Definition logger.h:60
bool in(const V &v, const T &i)
check if element is in vector.
Definition utils.h:192
static Rnd & r
Definition rnd.h:176
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
std::string toString() const
Definition fitness.h:199
int dominates(const Fitness &b) const
set obj vector given a string of objective names
Definition fitness.cpp:55
vector< string > objectives
Definition params.h:40
check for same fitness and complexity to filter uniqueness.
Definition population.h:99
Sort each island in increasing complexity. This is not thread safe. I should set complexities of the ...
Definition population.h:78
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...