Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
population.cpp
Go to the documentation of this file.
1#include "population.h"
2
3namespace Brush{
4namespace Pop{
5
6template<ProgramType T>
8{
9 individuals.resize(0);
10 mig_prob = 0.0;
11 pop_size = 0;
12 num_islands = 0;
13}
14
15
16template<ProgramType T>
18{
19 if (new_individuals.size() != params.pop_size
20 && new_individuals.size() != 2*params.pop_size ) {
21 throw std::runtime_error("Individual vector has different number of individuals than pop_size. popsize is "+to_string(params.pop_size)+", number of individuals is " + to_string(new_individuals.size()));
22 }
23
24 this->mig_prob = params.mig_prob;
25 this->pop_size = params.pop_size;
26 this->num_islands=params.num_islands;
27
28 island_indexes.resize(num_islands);
29
30 // If the assert fails, execution stops, but for completeness, you can also throw an exception
31 size_t p = pop_size;
32
33 individuals.resize(2*p);
34 std::fill(individuals.begin(), individuals.end(), nullptr);
35
36 for (int i=0; i<num_islands; ++i)
37 {
38 size_t idx_start = std::floor(i*p/num_islands);
39 size_t idx_end = std::floor((i+1)*p/num_islands);
40
41 auto delta = idx_end - idx_start;
42
43 island_indexes.at(i).resize(delta);
44 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
45
46 if (new_individuals.size() == 2*params.pop_size) { // pop + offspring
47 island_indexes.at(i).resize(delta*2);
48 iota(
49 island_indexes.at(i).begin() + delta, island_indexes.at(i).end(),
50 p+idx_start);
51 }
52 };
53
54 for (int j=0; j< new_individuals.size(); j++) {
55 individuals.at(j) = std::make_shared<Individual<T>>(new_individuals.at(j));
56 }
57}
58
59template<ProgramType T>
61{
62 this->mig_prob = params.mig_prob;
63 this->pop_size = params.pop_size;
64 this->num_islands=params.num_islands;
65
66 // Tuples with start and end indexes for each island. Number of individuals
67 // in each island can slightly differ if num_islands is not a divisor of p (popsize)
68 island_indexes.resize(num_islands);
69
70 size_t p = pop_size; // population size
71
72 for (int i=0; i<num_islands; ++i)
73 {
74 size_t idx_start = std::floor(i*p/num_islands);
75 size_t idx_end = std::floor((i+1)*p/num_islands);
76
77 auto delta = idx_end - idx_start;
78
79 island_indexes.at(i).resize(delta);
80 iota(island_indexes.at(i).begin(), island_indexes.at(i).end(), idx_start);
81 };
82
83 // this calls the default constructor for the container template class
84 individuals.resize(2*p); // we will never increase or decrease the size during execution (because is not thread safe). this way, theres no need to sync between selecting and varying the population
85
86 for (int i = 0; i< p; ++i)
87 {
88 // first half will contain the initial population
89 individuals.at(i) = std::make_shared<Individual<T>>();
90 individuals.at(i)->init(ss, params);
91 individuals.at(i)->set_objectives(params.objectives);
92
93 // second half is space to the offspring (but we dont initialize them)
94 individuals.at(p+i) = nullptr;
95 }
96}
97
98template<ProgramType T>
100{
101 std::ofstream out;
102 if (!filename.empty())
103 out.open(filename);
104 else
105 out.open("population.json");
106
107 json j;
108 to_json(j, *this);
109 out << j ;
110 out.close();
111 logger.log("Saved population to file " + filename, 1);
112}
113
114template<ProgramType T>
116{
117 std::ifstream indata;
118 indata.open(filename);
119 if (!indata.good())
120 HANDLE_ERROR_THROW("Invalid input file " + filename + "\n");
121
122 std::string line;
123 indata >> line;
124
125 json j = json::parse(line);
126 from_json(j, *this);
127
128 logger.log("Loaded population from " + filename + " of size = "
129 + to_string(this->size()),1);
130
131 indata.close();
132}
133
135template<ProgramType T>
137{
138 size_t p = pop_size; // population size. prep_offspring slots will douple the population, adding the new expressions into the islands
139
140 // this is going to be tricky (pay attention to delta and p use)
141 size_t idx_start = std::floor(island*p/num_islands);
142 size_t idx_end = std::floor((island+1)*p/num_islands);
143
144 auto delta = idx_end - idx_start; // island size
145
146 // inserting indexes of the offspring
147 island_indexes.at(island).resize(island_indexes.at(island).size() + delta);
148 iota(
149 island_indexes.at(island).begin() + delta, island_indexes.at(island).end(),
150 p+idx_start);
151
152 // Im keeping the offspring and parents in the same population object, because we
153 // have operations that require them together (archive, hall of fame.)
154 // The downside is having to be aware that islands will create offsprings
155 // intercalated with other islands
156}
157
158template<ProgramType T>
159void Population<T>::update(vector<vector<size_t>> survivors)
160{
161 // this is the step that should end up cutting off half of the population
162 vector<Individual<T>> new_pop;
163 new_pop.resize(0);
164 for (int j=0; j<num_islands; ++j)
165 {
166 for (int k=0; k<survivors.at(j).size(); ++k){
167 new_pop.push_back(
168 *individuals.at(survivors.at(j).at(k)) );
169 }
170
171 // need to make island point to original range
172 size_t idx_start = std::floor(j*pop_size/num_islands);
173 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
174
175 auto delta = idx_end - idx_start;
176
177 assert(delta == survivors.at(j).size()
178 && " migration ended up with a different popsize");
179
180 // inserting indexes of the offspring
181 island_indexes.at(j).clear();
182 island_indexes.at(j).resize(delta);
183 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
184 }
185
186 assert(new_pop.size() == pop_size
187 && " update ended up with a different popsize");
188
189 this->individuals.resize(0);
190 for (auto ind : new_pop)
191 {
192 // making hard copies of the individuals
193 json ind_copy = ind;
194
195 // this will fill just half of the pop
196 individuals.push_back(
197 std::make_shared<Individual<T>>(ind_copy) );
198 }
199
200 assert(individuals.size() == pop_size
201 && " number of new individuals is different from pop size");
202
203 for (int i=0; i< pop_size; ++i)
204 {
205 // second half is space to the offspring (but we dont initialize them)
206 individuals.push_back(nullptr);
207 }
208}
209
210template<ProgramType T>
212{
213 // not printing the island each individual belongs to
214 string output = "";
215
216 for (int j=0; j<num_islands; ++j)
217 {
218 output += "island " + to_string(j) + ":\n";
219
220 for (int k=0; k<island_indexes.at(j).size(); ++k) {
221 output += "ind index " + to_string(k);
222 output += " pos " + to_string(island_indexes.at(j).at(k)) + ": ";
223 Individual<T>& ind = *individuals.at(island_indexes.at(j).at(k)).get();
224 output += ind.get_model() + sep;
225 }
226 }
227 return output;
228}
229
230template<ProgramType T>
231vector<vector<size_t>> Population<T>::sorted_front(unsigned rank)
232{
233 // this is used to migration and update archive at the end of a generation. expect islands without offspring
234
235 /* Returns individuals on the Pareto front, sorted by increasign complexity. */
236 vector<vector<size_t>> pf_islands;
237 pf_islands.resize(num_islands);
238
239 for (int j=0;j<num_islands; ++j)
240 {
241 auto indices = island_indexes.at(j);
242 vector<size_t> pf;
243
244 for (int i=0; i<indices.size(); ++i)
245 {
246 // this assumes that rank was previously calculated. It is set in selection (ie nsga2) if the information is useful to select/survive
247 if (individuals.at(indices.at(i))->fitness.rank == rank)
248 pf.push_back(i);
249 }
250
251 std::sort(pf.begin(),pf.end(),SortComplexity(*this));
252 auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this));
253
254 pf.resize(std::distance(pf.begin(),it));
255 pf_islands.at(j) = pf;
256 }
257
258 return pf_islands;
259}
260
261template<ProgramType T>
262vector<size_t> Population<T>::hall_of_fame(unsigned rank)
263{
264 // TODO: hall of fame should unify all pareto fronts by doing a new fast_nds.
265 // TODO: use hall of fame instead of re-implmementing this feature in
266 // archive init and update functions
267
268 // this is used to migration and update archive at the end of a generation.
269 // Thiis function expects islands without offspring
270
271 vector<size_t> pf(0);
272
273 for (int j=0;j<num_islands; ++j)
274 {
275 auto indices = island_indexes.at(j);
276 for (int i=0; i<indices.size(); ++i)
277 {
278 if (individuals.at(indices.at(i))->fitness.rank == rank)
279 pf.push_back(indices.at(i));
280 }
281 }
282 std::sort(pf.begin(),pf.end(),SortComplexity(*this));
283
284 auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this));
285
286 pf.resize(std::distance(pf.begin(),it));
287
288 return pf;
289}
290
291template<ProgramType T>
293{
294 // changes where island points to by shuffling it
295
296 if (num_islands==1)
297 return; // skipping. this only work because update is fixing island indexes
298
299 // This method is not thread safe (as it is now)
300 vector<vector<size_t>> new_island_indexes;
301 new_island_indexes.resize(num_islands);
302
303 // std::cout << "Looping" << std::endl;
304 for (int island=0; island<num_islands; ++island)
305 {
306 new_island_indexes.at(island).resize(0);
307
308 auto indices = island_indexes.at(island);
309 for (unsigned int i=0; i<indices.size(); ++i)
310 {
311 if (r() < mig_prob)
312 {
313 size_t migrating_idx;
314
315 vector<int> other_islands(num_islands-1);
316 iota(other_islands.begin(), other_islands.end(), 0);
317
318 // skipping current island
319 auto it = other_islands.begin();
320 std::advance(it, island);
321 for (;it != other_islands.end(); ++it) {
322 ++(*it);
323 }
324
325 // picking other island
327 other_islands.begin(),
328 other_islands.end());
329
331 island_indexes.at(other_island).begin(),
332 island_indexes.at(other_island).end());
333
335 }
336 else
337 {
338 new_island_indexes.at(island).push_back(indices.at(i));
339 }
340 }
341 }
342
343 // making hard copies (so the next generation starts with islands that does not share individuals
344 // this is particularly important to avoid multiple threads assigning different rank/crowdist/dcounter
345 // or different fitness)
346
347 // std::cout << "starting to consolidate pop" << std::endl;
348 vector<Individual<T>> new_pop;
349 new_pop.resize(0);
350 for (int j=0; j<num_islands; ++j)
351 {
352 for (int k=0; k<new_island_indexes.at(j).size(); ++k){
353 new_pop.push_back(
354 *individuals.at(new_island_indexes.at(j).at(k)) );
355 }
356
357 // need to make island point to original range
358 size_t idx_start = std::floor(j*pop_size/num_islands);
359 size_t idx_end = std::floor((j+1)*pop_size/num_islands);
360
361 auto delta = idx_end - idx_start;
362
363 assert(delta == new_island_indexes.at(j).size()
364 && " new pop has the wrong number of new individuals");
365
366 // inserting indexes of the offspring
367 island_indexes.at(j).clear();
368 island_indexes.at(j).resize(delta);
369 iota(island_indexes.at(j).begin(), island_indexes.at(j).end(), idx_start);
370 }
371
372 assert(new_pop.size() == pop_size
373 && " migration ended up with a different popsize");
374
375 this->individuals.resize(0);
376 for (auto ind : new_pop)
377 {
378 // making hard copies of the individuals
379 json ind_copy = ind;
380
381 // this will fill just half of the pop
382 individuals.push_back(
383 std::make_shared<Individual<T>>(ind_copy) );
384 }
385 for (int i=0; i< pop_size; ++i)
386 {
387 // second half is space to the offspring (but we dont initialize them)
388 individuals.push_back(nullptr);
389 }
390}
391
392} // Pop
393} // Brush
void bind_engine(py::module &m, string name)
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:91
void add_offspring_indexes(int island)
update individual vector size, distributing the expressions in num_islands
void save(string filename)
vector< vector< size_t > > sorted_front(unsigned rank=1)
return complexity-sorted Pareto front indices for each island
void init(SearchSpace &ss, const Parameters &params)
initialize population of programs with a starting model and/or from file
string print_models(string sep="\n")
return population equations.
vector< size_t > hall_of_fame(unsigned rank=1)
void load(string filename)
void update(vector< vector< size_t > > survivors)
reduce programs to the indices in survivors. Not thread safe,as it removes elements
string log(string m, int v, string sep="\n") const
Prints a log message with verbosity control.
Definition logger.cpp:50
Iter select_randomly(Iter start, Iter end)
Definition rnd.h:61
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
void to_json(json &j, const Individual< T > &p)
Definition individual.h:150
void from_json(const json &j, Individual< T > &p)
Definition individual.h:162
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
static Logger & logger
Definition logger.h:60
static Rnd & r
Definition rnd.h:174
< nsga2 selection operator for getting the front
Definition data.cpp:12
vector< string > objectives
Definition params.h:38
check for same fitness and complexity to filter uniqueness.
Definition population.h:76
Sort each island in increasing complexity. This is not thread safe. I should set complexities of the ...
Definition population.h:65
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...