Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
population.cpp
Go to the documentation of this file.
1#include "population.h"
2
3namespace Brush{
4namespace Pop{
5
6template<ProgramType T>
8{
9 individuals.resize(0);
10 mig_prob = 0.0;
11 pop_size = 0;
12 num_islands = 0;
13 linear_complexity = false;
14}
15
16
17template<ProgramType T>
18void Population<T>::init(vector<Individual<T>>& new_individuals, const Parameters& params)
19{
20 if (new_individuals.size() != params.pop_size
21 && new_individuals.size() != 2*params.pop_size ) {
22 throw std::runtime_error("Individual vector has different number of individuals than pop_size. popsize is "+to_string(params.pop_size)+", number of individuals is " + to_string(new_individuals.size()));
23 }
24
25 this->mig_prob = params.mig_prob;
26 this->pop_size = params.pop_size;
27 this->num_islands=params.num_islands;
28 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
29
31
32 // If the assert fails, execution stops, but for completeness, you can also throw an exception
33 size_t p = pop_size;
34
35 individuals.resize(2*p);
36 std::fill(individuals.begin(), individuals.end(), nullptr);
37
38 for (int i=0; i<num_islands; ++i)
39 {
40 size_t idx_start = std::floor(i*p/num_islands);
41 size_t idx_end = std::floor((i+1)*p/num_islands);
42
43 auto delta = idx_end - idx_start;
44
45 island_indexes.at(i).resize(delta);
46 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
47
48 if (new_individuals.size() == 2*params.pop_size) { // pop + offspring
49 island_indexes.at(i).resize(delta*2);
50 iota(
51 island_indexes.at(i).begin() + delta, island_indexes.at(i).end(),
52 p+idx_start);
53 }
54 };
55
56 for (int j=0; j< new_individuals.size(); j++) {
57 individuals.at(j) = std::make_shared<Individual<T>>(new_individuals.at(j));
58 }
59}
60
61template<ProgramType T>
63{
64 this->mig_prob = params.mig_prob;
65 this->pop_size = params.pop_size;
66 this->num_islands=params.num_islands;
67 this->linear_complexity = in(params.objectives, std::string("linear_complexity"));
68
69 // Tuples with start and end indexes for each island. Number of individuals
70 // in each island can slightly differ if num_islands is not a divisor of p (popsize)
72
73 size_t p = pop_size; // population size
74
75 for (int i=0; i<num_islands; ++i)
76 {
77 size_t idx_start = std::floor(i*p/num_islands);
78 size_t idx_end = std::floor((i+1)*p/num_islands);
79
80 auto delta = idx_end - idx_start;
81
82 island_indexes.at(i).resize(delta);
83 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
84 };
85
86 // this calls the default constructor for the container template class
87 individuals.resize(2*p); // we will never increase or decrease the size during execution (because is not thread safe). this way, theres no need to sync between selecting and varying the population
88
89 for (int i = 0; i< p; ++i)
90 {
91 // first half will contain the initial population
92 individuals.at(i) = std::make_shared<Individual<T>>();
93 individuals.at(i)->init(ss, params);
94
95 // second half is space to the offspring (but we dont initialize them)
96 individuals.at(p+i) = nullptr;
97 }
98}
99
100template<ProgramType T>
101void Population<T>::save(string filename)
102{
103 std::ofstream out;
104 if (!filename.empty())
105 out.open(filename);
106 else
107 out.open("population.json");
108
109 json j;
110 to_json(j, *this);
111 out << j ;
112 out.close();
113 logger.log("Saved population to file " + filename, 1);
114}
115
116template<ProgramType T>
117void Population<T>::load(string filename)
118{
119 std::ifstream indata;
120 indata.open(filename);
121 if (!indata.good())
122 HANDLE_ERROR_THROW("Invalid input file " + filename + "\n");
123
124 std::string line;
125 indata >> line;
126
127 json j = json::parse(line);
128 from_json(j, *this);
129
130 logger.log("Loaded population from " + filename + " of size = "
131 + to_string(this->size()),1);
132
133 indata.close();
134}
135
137template<ProgramType T>
139{
140 size_t p = pop_size; // population size. prep_offspring slots will douple the population, adding the new expressions into the islands
141
142 // this is going to be tricky (pay attention to delta and p use)
143 size_t idx_start = std::floor(island*p/num_islands);
144 size_t idx_end = std::floor((island+1)*p/num_islands);
145
146 auto delta = idx_end - idx_start; // island size
147
148 // inserting indexes of the offspring
149 island_indexes.at(island).resize(island_indexes.at(island).size() + delta);
150 iota(
151 island_indexes.at(island).begin() + delta, island_indexes.at(island).end(),
152 p+idx_start);
153
154 // Im keeping the offspring and parents in the same population object, because we
155 // have operations that require them together (archive, hall of fame.)
156 // The downside is having to be aware that islands will create offsprings
157 // intercalated with other islands
158}
159
160template<ProgramType T>
161void Population<T>::update(vector<vector<size_t>> survivors)
162{
163 // cout << "[Population::update] Starting update with "
164 // << survivors.at(0).size() << " individuals.\n";
165
166 // this is the step that should end up cutting off half of the population
167 vector<Individual<T>> new_pop;
168 new_pop.resize(0);
169 for (int k=0; k<survivors.at(0).size(); ++k){
170 // cout << " - Adding survivor idx " << survivors.at(0).at(k) << endl;
171 new_pop.push_back( *individuals.at(survivors.at(0).at(k)) );
172 }
173
174 for (int j=0; j<num_islands; ++j)
175 {
176 // cout << " Processing island " << j << endl;
177
178 // need to make island point to original range
179 size_t idx_start = std::floor(j*pop_size/num_islands);
180 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
181 auto delta = idx_end - idx_start;
182
183 // cout << " Island " << j << " index range [" << idx_start << ", " << idx_end << ") -> delta = " << delta << endl;
184
185 // assert(delta == survivors.at(0).size()
186 // && " migration ended up with a different popsize");
187
188 // inserting indexes of the offspring
189 island_indexes.at(j).clear();
190 island_indexes.at(j).resize(delta);
191 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
192 }
193
194 assert(new_pop.size() == pop_size
195 && " update ended up with a different popsize");
196
197 this->individuals.resize(0);
198 for (auto ind : new_pop)
199 {
200 // making hard copies of the individuals
201 json ind_copy = ind;
202
203 // this will fill just half of the individuals vector
204 individuals.push_back(
205 std::make_shared<Individual<T>>(ind_copy) );
206 }
207
208 assert(individuals.size() == pop_size
209 && " number of new individuals is different from pop size");
210
211 for (int i=0; i< pop_size; ++i)
212 {
213 // second half is space to the offspring (but we dont initialize them)
214 individuals.push_back(nullptr);
215 }
216}
217
218template<ProgramType T>
220{
221 // TODO: rename it. This function does not print anything, just returns a string
222 string output = "";
223
224 for (int j=0; j<num_islands; ++j)
225 {
226 for (int k=0; k<island_indexes.at(j).size(); ++k) {
227 Individual<T>& ind = *individuals.at(island_indexes.at(j).at(k)).get();
228
229 output += "island " + to_string(j) + " ";
230 output += "index " + to_string(island_indexes.at(j).at(k)) + ": ";
231 output += ind.get_model() + " ";
232 output += ind.fitness.toString() + sep;
233 }
234 }
235 return output;
236}
237
238template<ProgramType T>
239vector<vector<size_t>> Population<T>::sorted_front(unsigned rank)
240{
241 // this is used to migration and update archive at the end of a generation. expect islands without offspring
242
243 /* Returns individuals on the Pareto front, sorted by increasign complexity. */
244 vector<vector<size_t>> pf_islands;
245 pf_islands.resize(num_islands);
246
247 for (int j=0;j<num_islands; ++j)
248 {
249 auto indices = island_indexes.at(j);
250 vector<size_t> pf;
251
252 for (int i=0; i<indices.size(); ++i)
253 {
254 // this assumes that rank was previously calculated. It is set in selection (ie nsga2) if the information is useful to select/survive
255 if (individuals.at(indices.at(i))->fitness.rank == rank)
256 pf.push_back(i);
257 }
258
259 if (this->linear_complexity)
260 std::sort(pf.begin(),pf.end(),SortLinearComplexity(*this));
261 else
262 std::sort(pf.begin(),pf.end(),SortComplexity(*this));
263
264 auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this));
265
266 pf.resize(std::distance(pf.begin(),it));
267 pf_islands.at(j) = pf;
268 }
269
270 return pf_islands;
271}
272
273template<ProgramType T>
274vector<size_t> Population<T>::hall_of_fame(unsigned rank)
275{
276 // Inspired in fast nds from nsga2
277
278 // TODO: use hall of fame instead of re-implmementing this feature in
279 // archive init and update functions
280
281 // this is used to migration and update archive at the end of a generation.
282 // Thiis function expects islands without offspring
283
284 // TODO: run fast_nds here to get the pareto fronts? I think this could improve performance
285 vector<size_t> merged_islands(0);
286
287 for (int j=0;j<num_islands; ++j)
288 {
289 auto indices = island_indexes.at(j);
290 for (int i=0; i<indices.size(); ++i)
291 {
292 merged_islands.push_back(indices.at(i));
293 }
294 }
295
296 // checking if there is no dominance between different fronts
297 // (without updating their fitness objects)
298 vector<size_t> hof;
299 hof.clear();
300
301 for (int i = 0; i < merged_islands.size(); ++i) {
302 int dcount = 0;
303
304 auto p = individuals.at(merged_islands[i]);
305
306 for (int j = 0; j < merged_islands.size(); ++j) {
307 const Individual<T>& q = (*individuals.at(merged_islands[j]));
308
309 int compare = p->fitness.dominates(q.fitness);
310 if (compare == -1) { // q dominates p
311 //p.dcounter += 1;
312 dcount += 1;
313 }
314 }
315
316 if (dcount == 0) {
317 hof.push_back(merged_islands[i]);
318 }
319 }
320
321 if (this->linear_complexity)
322 std::sort(hof.begin(),hof.end(),SortLinearComplexity(*this));
323 else
324 std::sort(hof.begin(),hof.end(),SortComplexity(*this));
325
326 auto it = std::unique(hof.begin(),hof.end(),SameFitComplexity(*this));
327
328 hof.resize(std::distance(hof.begin(),it));
329
330 return hof;
331}
332
333template<ProgramType T>
335{
336 // changes where island points to by shuffling it
337
338 if (num_islands==1)
339 return; // skipping. this only work because update is fixing island indexes
340
341 // This method is not thread safe (as it is now)
342 vector<vector<size_t>> new_island_indexes;
343 new_island_indexes.resize(num_islands);
344
345 for (int island=0; island<num_islands; ++island)
346 {
347 new_island_indexes.at(island).resize(0);
348
349 auto indices = island_indexes.at(island);
350 for (unsigned int i=0; i<indices.size(); ++i)
351 {
352 if (r() < mig_prob)
353 {
354 size_t migrating_idx;
355
356 vector<int> other_islands(num_islands-1);
357 iota(other_islands.begin(), other_islands.end(), 0);
358
359 // skipping current island
360 auto it = other_islands.begin();
361 std::advance(it, island);
362 for (;it != other_islands.end(); ++it) {
363 ++(*it);
364 }
365
366 // picking other island
367 int other_island = *r.select_randomly(
368 other_islands.begin(),
369 other_islands.end());
370
371 migrating_idx = *r.select_randomly(
372 island_indexes.at(other_island).begin(),
373 island_indexes.at(other_island).end());
374
375 new_island_indexes.at(island).push_back(migrating_idx);
376 }
377 else
378 {
379 new_island_indexes.at(island).push_back(indices.at(i));
380 }
381 }
382 }
383
384 // making hard copies (so the next generation starts with islands that does not share individuals
385 // this is particularly important to avoid multiple threads assigning different rank/crowdist/dcounter
386 // or different fitness)
387
388 vector<Individual<T>> new_pop;
389 new_pop.resize(0);
390 for (int j=0; j<num_islands; ++j)
391 {
392 for (int k=0; k<new_island_indexes.at(j).size(); ++k){
393 new_pop.push_back(
394 *individuals.at(new_island_indexes.at(j).at(k)) );
395 }
396
397 // need to make island point to original range
398 size_t idx_start = std::floor(j*pop_size/num_islands);
399 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
400
401 auto delta = idx_end - idx_start;
402
403 assert(delta == new_island_indexes.at(j).size()
404 && " new pop has the wrong number of new individuals");
405
406 // inserting indexes of the offspring
407 island_indexes.at(j).clear();
408 island_indexes.at(j).resize(delta);
409 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
410 }
411
412 assert(new_pop.size() == pop_size
413 && " migration ended up with a different popsize");
414
415 this->individuals.resize(0);
416 for (auto ind : new_pop)
417 {
418 // making hard copies of the individuals
419 json ind_copy = ind;
420
421 // this will fill just half of the pop
422 individuals.push_back(
423 std::make_shared<Individual<T>>(ind_copy) );
424 }
425 for (int i=0; i< pop_size; ++i)
426 {
427 // second half is space to the offspring (but we dont initialize them)
428 individuals.push_back(nullptr);
429 }
430}
431
432} // Pop
433} // Brush
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:104
Fitness fitness
aggregate fitness score
Definition individual.h:37
bool linear_complexity
Indicates if the user set linear_complexity instead of recursive complexity.
Definition population.h:17
int size()
returns population size (the effective size of the individuals)
Definition population.h:37
void add_offspring_indexes(int island)
update individual vector size, distributing the expressions in num_islands
void save(string filename)
vector< vector< size_t > > sorted_front(unsigned rank=1)
return complexity-sorted Pareto front indices for each island
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:19
void init(SearchSpace &ss, const Parameters &params)
initialize population of programs with a starting model and/or from file
string print_models(string sep="\n")
return population equations.
vector< vector< size_t > > island_indexes
Definition population.h:20
vector< size_t > hall_of_fame(unsigned rank=1)
void load(string filename)
void update(vector< vector< size_t > > survivors)
reduce programs to the indices in survivors. Not thread safe,as it removes elements
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
void to_json(json &j, const Individual< T > &p)
Definition individual.h:175
void from_json(const json &j, Individual< T > &p)
Definition individual.h:189
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
static Logger & logger
Definition logger.h:60
bool in(const V &v, const T &i)
check if element is in vector.
Definition utils.h:192
static Rnd & r
Definition rnd.h:174
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
std::string toString() const
Definition fitness.h:204
int dominates(const Fitness &b) const
set obj vector given a string of objective names
Definition fitness.cpp:43
vector< string > objectives
Definition params.h:40
check for same fitness and complexity to filter uniqueness.
Definition population.h:87
Sort each island in increasing complexity. This is not thread safe. I should set complexities of the ...
Definition population.h:66
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...