Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
variation.cpp
Go to the documentation of this file.
1#include "variation.h"
2
3namespace Brush {
4namespace Var {
5
13{
14public:
15 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
16 const Parameters& params)
17 {
18 // get_node_like will sample a similar node based on node_map_weights or
19 // terminal_weights, and maybe will return a Node.
21
22 if (!newNode) // overload to check if newNode == nullopt
23 return false;
24
25 // if optional contains a Node, we access its contained value
26 Tree.replace(spot, *newNode);
27
28 return true;
29 }
30};
31
39{
40public:
41 static auto find_spots(tree<Node>& Tree, const SearchSpace& SS,
42 const Parameters& params)
43 {
44 vector<float> weights;
45
46 if (Tree.size() < params.get_max_size()) {
47 Iter iter = Tree.begin();
48 std::transform(Tree.begin(), Tree.end(), std::back_inserter(weights),
49 [&](const auto& n){
50 size_t d = 1+Tree.depth(iter);
51 std::advance(iter, 1);
52
53 // check if SS holds an operator to avoid failing `check` in sample_op_with_arg
54 if ((d >= params.get_max_depth())
55 || (SS.node_map.find(n.ret_type) == SS.node_map.end())) {
56 return 0.0f;
57 }
58 else {
59 return n.get_prob_change();
60 }
61 });
62 }
63 else {
64 // fill the vector with zeros, since we're already at max_size
65 weights.resize(Tree.size());
66 std::fill(weights.begin(), weights.end(), 0.0f);
67 }
68
69 return weights;
70 }
71
72 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
73 const Parameters& params)
74 {
75 auto spot_type = spot.node->data.ret_type;
76
77 // pick a random compatible node to insert (with probabilities given by
78 // node_map_weights). The `-1` represents the node being inserted.
79 // Ideally, it should always find at least one match (the same node
80 // used as a reference when calling the function). However, we have a
81 // size restriction, which will be relaxed here (just as it is in the PTC2
82 // algorithm). This mutation can create a new expression that exceeds the
83 // maximum size by the highest arity among the operators.
84 std::optional<Node> n = SS.sample_op_with_arg(
85 spot_type, spot_type, true, params.max_size-Tree.size()-1);
86
87 if (!n) // there is no operator with compatible arguments
88 return false;
89
90 // make node n wrap the subtree at the chosen spot
91 auto parent_node = Tree.wrap(spot, *n);
92
93 // now fill the arguments of n appropriately
94 bool spot_filled = false;
95 for (auto a: (*n).arg_types)
96 {
97 if (spot_filled)
98 {
99 // if spot is in its child position, append children.
100 auto opt = SS.sample_terminal(a);
101
102 if (!opt)
103 return false;
104
105 Tree.append_child(parent_node, opt.value());
106 }
107 // if types match, treat this spot as filled by the spot node
108 else if (a == spot_type)
109 spot_filled = true;
110 // otherwise, add siblings before spot node
111 else {
112 auto opt = SS.sample_terminal(a);
113
114 if (!opt)
115 return false;
116
117 Tree.insert(spot, opt.value());
118 }
119 }
120
121 return true;
122 }
123};
124
132{
133public:
134 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
135 const Parameters& params)
136 {
137 // sample_terminal will sample based on terminal_weights. If it succeeds,
138 // then the new terminal will be in `opt.value()`
139 auto opt = SS.sample_terminal(spot.node->data.ret_type);
140
141 if (!opt) // there is no terminal with compatible arguments
142 return false;
143
144 Tree.erase_children(spot);
145
146 Tree.replace(spot, opt.value());
147
148 return true;
149 }
150};
151
159{
160public:
161 static auto find_spots(tree<Node>& Tree, const SearchSpace& SS,
162 const Parameters& params)
163 {
164 vector<float> weights(Tree.size());
165
166 if (Tree.size() < params.max_size) {
167 std::transform(Tree.begin(), Tree.end(), weights.begin(),
168 [&](const auto& n){
169 // some nodetypes must always have a weight
170 if (Is<NodeType::OffsetSum>(n.node_type))
171 return 0.0f;
172
173 // only weighted nodes can be toggled off
174 if (!n.get_is_weighted()
175 && IsWeighable(n.ret_type))
176 {
177 return n.get_prob_change();
178 }
179 else
180 return 0.0f;
181 });
182 }
183 else {
184 // fill the vector with zeros, since we're already at max_size
185 std::fill(weights.begin(), weights.end(), 0.0f);
186 }
187
188 return weights;
189 }
190
191 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
192 const Parameters& params)
193 {
194 if (spot.node->data.get_is_weighted()==true // cant turn on whats already on
195 || !IsWeighable(spot.node->data.ret_type)) // does not accept weights (e.g. boolean)
196 return false; // false indicates that mutation failed and should return std::nullopt
197
198 spot.node->data.set_is_weighted(true);
199 return true;
200 }
201};
202
210{
211public:
212 static auto find_spots(tree<Node>& Tree, const SearchSpace& SS,
213 const Parameters& params)
214 {
215 vector<float> weights(Tree.size());
216
217 std::transform(Tree.begin(), Tree.end(), weights.begin(),
218 [&](const auto& n){
219 // some nodetypes must always have a weight
220 if (Is<NodeType::OffsetSum>(n.node_type))
221 return 0.0f;
222
223 if (n.get_is_weighted()
224 && IsWeighable(n.ret_type))
225 return n.get_prob_change();
226 else
227 return 0.0f;
228 });
229
230 return weights;
231 }
232
233 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
234 const Parameters& params)
235 {
236 // cout << "toggle_weight_off mutation\n";
237
238 if (spot.node->data.get_is_weighted()==false)
239 return false;
240
241 spot.node->data.set_is_weighted(false);
242 return true;
243 }
244};
245
253{
254public:
255 static auto find_spots(tree<Node>& Tree, const SearchSpace& SS,
256 const Parameters& params)
257 {
258 vector<float> weights;
259
260 auto node_map = SS.node_map;
261
262 if (Tree.size() < params.max_size) {
263 Iter iter = Tree.begin();
264 std::transform(Tree.begin(), Tree.end(), std::back_inserter(weights),
265 [&](const auto& n){
266 size_t d = 1+Tree.depth(iter);
267 std::advance(iter, 1);
268
269 // we need to make sure there's some node to start the subtree
270 if ((d >= params.max_depth)
271 || (SS.node_map.find(n.ret_type) == SS.node_map.end())
272 || (SS.node_map.find(n.ret_type) == SS.node_map.end()) )
273 return 0.0f;
274 else
275 return n.get_prob_change();
276 });
277 }
278 else {
279 weights.resize(Tree.size());
280 std::fill(weights.begin(), weights.end(), 0.0f);
281 }
282
283 return weights;
284 }
285
286 static auto mutate(tree<Node>& Tree, Iter spot, const SearchSpace& SS,
287 const Parameters& params)
288 {
289 // check if we exceeded the size/depth constrains (without subtracting,
290 // to avoid overflow cases if the user sets max_size smaller than arity
291 // of smallest operator. The overflow would happen when calculating d and
292 // s in the following lines, to choose the PTC2 limits)
293 if ( params.max_size <= (Tree.size() - Tree.size(spot))
294 || params.max_depth <= Tree.depth(spot) )
295 return false;
296
297 auto spot_type = spot.node->data.ret_type;
298
299 // d and s must be compatible with PTC2 --- they should be based on
300 // tree structure, not program structure
301 size_t d = params.max_depth - Tree.depth(spot);
302 size_t s = params.max_size - (Tree.size() - Tree.size(spot));
303
304 s = r.rnd_int(1, s);
305
306 // sample subtree uses PTC2, which operates on depth and size of the tree<Node>
307 // (and not on the program!). we shoudn't care for weights here
308 auto subtree = SS.sample_subtree(spot.node->data, d, s);
309
310 if (!subtree) // there is no terminal with compatible arguments
311 return false;
312
313 // if optional contains a Node, we access its contained value
314 Tree.erase_children(spot);
315 Tree.replace(spot, subtree.value().begin());
316
317 return true;
318 }
319};
320
344template<Brush::ProgramType T>
345std::optional<Individual<T>> Variation<T>::cross(
346 const Individual<T>& mom, const Individual<T>& dad)
347{
348 /* subtree crossover between this and other, producing new Program */
349 // choose location by weighted sampling of program
350 // TODO: why doesn't this copy the search space reference to child?
351 Program<T> child(mom.program);
352
353 // pick a subtree to replace
354 vector<float> child_weights(child.Tree.size());
355 auto child_iter = child.Tree.begin();
356 std::transform(child.Tree.begin(), child.Tree.end(), child_weights.begin(),
357 [&](const auto& n){
358 auto s_at = child.size_at(child_iter);
359 auto d_at = child.depth_to_reach(child_iter);
360
361 std::advance(child_iter, 1);
362
363 if (s_at<parameters.max_size && d_at<parameters.max_depth)
364 return n.get_prob_change();
365 else
366 return 0.0f;
367 }
368 );
369
370 if (std::all_of(child_weights.begin(), child_weights.end(), [](const auto& w) {
371 return w<=0.0;
372 }))
373 { // There is no spot that has a probability to be selected
374 return std::nullopt;
375 }
376
377 // pick a subtree to insert. Selection is based on other_weights
378 Program<T> other(dad.program);
379
380 int attempts = 0;
381 while (++attempts <= 3)
382 {
383 auto child_spot = r.select_randomly(child.Tree.begin(),
384 child.Tree.end(),
385 child_weights.begin(),
386 child_weights.end()
387 );
388
389 auto child_ret_type = child_spot.node->data.ret_type;
390
391 auto allowed_size = parameters.max_size -
392 ( child.size() - child.size_at(child_spot) );
393 auto allowed_depth = parameters.max_depth -
394 ( child.depth_to_reach(child_spot) );
395
396 vector<float> other_weights(other.Tree.size());
397
398 // iterator to get the size of subtrees inside transform
399 auto other_iter = other.Tree.begin();
400
401 // lambda function to check feasibility of solution and increment the iterator
402 const auto check_and_incrm = [other, &other_iter, allowed_size, allowed_depth]() -> bool {
403 int s = other.size_at( other_iter );
404 int d = other.depth_at( other_iter );
405
406 std::advance(other_iter, 1);
407 return (s <= allowed_size) && (d <= allowed_depth);
408 };
409
410 std::transform(other.Tree.begin(), other.Tree.end(),
411 other_weights.begin(),
412 [child_ret_type, check_and_incrm](const auto& n){
413 // need to pick a node that has a matching output type to the child_spot.
414 // also need to check if swaping this node wouldn't exceed max_size
415 if (check_and_incrm() && (n.ret_type == child_ret_type))
416 return n.get_prob_change();
417 else
418 // setting the weight to zero to indicate a non-feasible crossover point
419 return 0.0f;
420 }
421 );
422
423 bool matching_spots_found = false;
424 for (const auto& w: other_weights)
425 {
426 // we found at least one weight that is non-zero
427 matching_spots_found = w > 0.0;
428
431 other.Tree.begin(),
432 other.Tree.end(),
433 other_weights.begin(),
434 other_weights.end()
435 );
436
437 // fmt::print("other_spot : {}\n",other_spot.node->data);
438 // swap subtrees at child_spot and other_spot
439 child.Tree.move_ontop(child_spot, other_spot);
440
442 ind.set_objectives(mom.get_objectives()); // it will have an invalid fitness
443
444 return ind;
445 }
446 }
447 }
448
449 return std::nullopt;
450}
451
485template<Brush::ProgramType T>
486std::optional<Individual<T>> Variation<T>::mutate(const Individual<T>& parent)
487{
488 auto options = parameters.mutation_probs;
489
490 bool all_zero = true;
491 for (auto &it : parameters.mutation_probs) {
492 if (it.second > 0.0) {
493 all_zero = false;
494 break;
495 }
496 }
497
498 if (all_zero)
499 { // No mutation can be successfully applied to this solution
500 return std::nullopt;
501 }
502
503 Program<T> child(parent.program);
504
505 int attempts = 0;
506 while(++attempts <= 3)
507 {
508 // choose a valid mutation option
509 string choice = r.random_choice(parameters.mutation_probs);
510
511 vector<float> weights;
512
513 // choose location by weighted sampling of program
514 if (choice == "point")
515 weights = PointMutation::find_spots(child.Tree, search_space, parameters);
516 else if (choice == "insert")
517 weights = InsertMutation::find_spots(child.Tree, search_space, parameters);
518 else if (choice == "delete")
519 weights = DeleteMutation::find_spots(child.Tree, search_space, parameters);
520 else if (choice == "subtree")
521 weights = SubtreeMutation::find_spots(child.Tree, search_space, parameters);
522 else if (choice == "toggle_weight_on")
523 weights = ToggleWeightOnMutation::find_spots(child.Tree, search_space, parameters);
524 else if (choice == "toggle_weight_off")
525 weights = ToggleWeightOffMutation::find_spots(child.Tree, search_space, parameters);
526 else {
527 std::string msg = fmt::format("{} not a valid mutation choice", choice);
529 }
530
531 if (std::all_of(weights.begin(), weights.end(), [](const auto& w) {
532 return w<=0.0;
533 }))
534 { // There is no spot that has a probability to be selected
535 continue;
536 }
537
538 // apply the mutation and check if it succeeded
539 auto spot = r.select_randomly(child.Tree.begin(), child.Tree.end(),
540 weights.begin(), weights.end());
541
542 // Every mutation here works inplace, so they return bool instead of
543 // std::optional to indicare the result of their manipulation over the
544 // program tree. Here we call the mutation function and return the result
545
546 bool success;
547 if (choice == "point")
548 success = PointMutation::mutate(child.Tree, spot, search_space, parameters);
549 else if (choice == "insert")
550 success = InsertMutation::mutate(child.Tree, spot, search_space, parameters);
551 else if (choice == "delete")
552 success = DeleteMutation::mutate(child.Tree, spot, search_space, parameters);
553 else if (choice == "subtree")
554 success = SubtreeMutation::mutate(child.Tree, spot, search_space, parameters);
555 else if (choice == "toggle_weight_on")
556 success = ToggleWeightOnMutation::mutate(child.Tree, spot, search_space, parameters);
557 else // it must be"toggle_weight_off"
558 success = ToggleWeightOffMutation::mutate(child.Tree, spot, search_space, parameters);
559
560 // std::cout << "returning" << std::endl;
561 if (success
562 && ( (child.size() <= parameters.max_size)
563 && (child.depth() <= parameters.max_depth) )){
564
566 ind.set_objectives(parent.get_objectives()); // it will have an invalid fitness
567
568 return ind;
569 } else {
570 continue;
571 }
572 }
573
574 return std::nullopt;
575}
576
577template <Brush::ProgramType T>
579 const vector<size_t>& parents)
580{
582
583 for (unsigned i = 0; i<indices.size(); ++i)
584 {
585 if (pop.individuals.at(indices.at(i)) != nullptr)
586 {
587 continue; // skipping if it is an individual
588 }
589
590 // pass check for children undergoing variation
591 std::optional<Individual<T>> opt=std::nullopt; // new individual
592
593 const Individual<T>& mom = pop[
594 *r.select_randomly(parents.begin(), parents.end())];
595
596 vector<Individual<T>> ind_parents;
597 if ( r() < parameters.cx_prob) // crossover
598 {
599 const Individual<T>& dad = pop[
600 *r.select_randomly(parents.begin(), parents.end())];
601
602 opt = cross(mom, dad);
603 ind_parents = {mom, dad};
604 }
605 else // mutation
606 {
607 opt = mutate(mom);
608 ind_parents = {mom};
609 }
610
611 // this assumes that islands do not share indexes before doing variation
612 unsigned id = parameters.current_gen*parameters.pop_size+indices.at(i);
613
614 // mutation and crossover already perform 3 attempts. If it fails, we just fill with a random individual
615 if (opt) // variation worked, lets keep this
616 {
617 Individual<T> ind = opt.value();
618
619 ind.is_fitted_ = false;
620 ind.set_id(id);
621 ind.set_parents(ind_parents);
622
623 assert(ind.program.size()>0);
624 pop.individuals.at(indices.at(i)) = std::make_shared<Individual<T>>(ind);
625 }
626 else { // no optional value was returned
628
629 // creating a new random individual
630 new_ind.init(search_space, parameters);
631 new_ind.set_objectives(mom.get_objectives()); // it will have an invalid fitness
632 new_ind.set_id(id);
633 new_ind.is_fitted_ = false;
634
635 pop.individuals.at(indices.at(i)) = std::make_shared<Individual<T>>(new_ind);
636 }
637 }
638}
639
640} //namespace Var
641} //namespace Brush
void bind_engine(py::module &m, string name)
vector< string > get_objectives() const
Definition individual.h:127
void init(SearchSpace &ss, const Parameters &params)
Definition individual.h:45
Program< T > program
executable data structure
Definition individual.h:17
vector< size_t > get_island_indexes(int island)
Definition population.h:38
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:18
T random_choice(const map< T, float > &m)
Definition rnd.h:96
Iter select_randomly(Iter start, Iter end)
Definition rnd.h:61
int rnd_int(int lowerLimit, int upperLimit)
Definition rnd.cpp:68
delete subtree and replace it with a terminal of the same return type
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
insert a node with spot as a child
Definition variation.cpp:39
static auto find_spots(tree< Node > &Tree, const SearchSpace &SS, const Parameters &params)
Definition variation.cpp:41
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
Definition variation.cpp:72
tree< Node >::pre_order_iterator Iter
Definition variation.h:25
replace node with same typed node
Definition variation.cpp:13
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
Definition variation.cpp:15
replaces the subtree rooted in spot
static auto find_spots(tree< Node > &Tree, const SearchSpace &SS, const Parameters &params)
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
toggle the node's weight OFF
static auto find_spots(tree< Node > &Tree, const SearchSpace &SS, const Parameters &params)
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
toggle the node's weight ON
static auto mutate(tree< Node > &Tree, Iter spot, const SearchSpace &SS, const Parameters &params)
static auto find_spots(tree< Node > &Tree, const SearchSpace &SS, const Parameters &params)
std::optional< Individual< T > > mutate(const Individual< T > &parent)
Performs mutation operation on an individual.
void vary(Population< T > &pop, int island, const vector< size_t > &parents)
Handles variation of a population.
std::optional< Individual< T > > cross(const Individual< T > &mom, const Individual< T > &dad)
Performs crossover operation on two individuals.
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
static Rnd & r
Definition rnd.h:174
< nsga2 selection operator for getting the front
Definition data.cpp:12
auto IsWeighable() noexcept -> bool
Definition node.h:46
SearchSpace SS
unsigned get_max_size() const
Definition params.h:135
unsigned int max_depth
Definition params.h:35
unsigned int max_size
Definition params.h:36
An individual program, a.k.a. model.
Definition program.h:50
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...
Map< Node > node_map
Maps return types to argument types to node types.
std::optional< Node > get_node_like(Node node) const
get a node with a signature matching node
std::optional< Node > sample_op_with_arg(DataType ret, DataType arg, bool terminal_compatible=true, int max_args=0) const
get operator with at least one argument matching arg
std::optional< tree< Node > > sample_subtree(Node root, int max_d, int max_size) const
create a subtree with maximum size and depth restrictions and root of type root_type
std::optional< Node > sample_terminal(bool force_return=false) const
Get a random terminal.