Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
evaluation.cpp
Go to the documentation of this file.
1#include "evaluation.h"
2
3namespace Brush{
4namespace Eval{
5
6
7// fitness of population
8template<ProgramType T>
10 int island,
11 const Dataset& data,
12 const Parameters& params,
13 bool fit,
14 bool validation
15 )
16{
17 auto indices = pop.get_island_indexes(island);
18
19 for (unsigned i = 0; i<indices.size(); ++i)
20 {
21 auto& ind_ptr = pop.individuals.at(indices.at(i));
22
23 // Skip nullptr individuals (offspring slots not yet filled)
24 if (!ind_ptr) {
25 HANDLE_ERROR_THROW(fmt::format(
26 "Evaluation::update_fitness - attempted to update fitness of a nullptr. "
27 "This suggests population integrity was damaged by another manipulation method."));
28
29 }
30
31 Individual<T>& ind = *ind_ptr.get(); // we are modifying it, so operator[] wont work
32
33 if (false) // pass
34 {
37
38 ind.error = MAX_FLT*VectorXf::Ones(data.y.size());
39 }
40 else
41 {
42 // assign weights to individual
43 if (fit && ind.get_is_fitted() == false)
44 {
45 ind.program.fit(data.get_training_data());
46 }
47
48 assign_fit(ind, data, params, validation);
49 }
50 }
51}
52
53// assign loss to program
54template<ProgramType T>
56 const Parameters& params, bool val)
57{
58 VectorXf errors;
59 using PT = ProgramType;
60
61 Dataset train = data.get_training_data();
62 float f = S.score(ind, train, errors, params);
63 ind.error = errors;
64
65 float f_v = f;
66 if (data.use_validation) {
67 Dataset validation = data.get_validation_data();
68
69 // when calculating validation score, we should not let
70 // it write in errors vector. That would avoid validation data leakage
71 VectorXf val_errors;
72 f_v = S.score(ind, validation, val_errors, params);
73
74 // if (val) // never use validation data here. This is used in lexicase selection
75 // ind.error = val_errors;
76 }
77
78 float error_weight = Individual<T>::weightsMap[params.scorer];
79 if (std::isnan(f) || std::isinf(f))
80 f = error_weight > 0 ? -MAX_FLT : MAX_FLT;
81 if (std::isnan(f_v) || std::isinf(f_v))
82 f_v = error_weight > 0 ? -MAX_FLT : MAX_FLT;
83
84 // This is what is going to determine the weights for the individual's fitness.
85 // OBS: always use get_objectives, as it will replace the "scorer" string with
86 // the actual scorer function name
87 ind.set_objectives(params.get_objectives());
88
89 // when we use these setters, it updates its previous values references
90 ind.fitness.set_loss(f);
91 ind.fitness.set_loss_v(f_v);
92 ind.fitness.set_size(ind.get_size());
95 ind.fitness.set_depth(ind.get_depth());
96
97 vector<float> values;
98 values.resize(0);
99
100 // TODO: implement a better way of switching between train and val
101 // without the burden of calculating stuff everytime
102 for (const auto& n : ind.get_objectives())
103 {
104 // TODO: this should be done in the fitness class when calling wvalues() or values()
105 if (n.compare(params.scorer)==0)
106 values.push_back(val ? f_v : f);
107 else if (n.compare("complexity")==0)
108 values.push_back(ind.get_complexity());
109 else if (n.compare("linear_complexity")==0)
110 values.push_back(ind.get_linear_complexity());
111 else if (n.compare("size")==0)
112 values.push_back(ind.get_size());
113 else if (n.compare("depth")==0)
114 values.push_back(ind.get_depth());
115 else
116 HANDLE_ERROR_THROW(n+" is not a known objective");
117 }
118
119 // will use inner attributes to set the fitness object
120 ind.fitness.set_values(values);
121}
122
123} // Pop
124} // Brush
holds variable type data.
Definition data.h:51
Dataset get_validation_data() const
Definition data.cpp:215
bool use_validation
Definition data.h:90
ArrayXf y
length N array, the target label
Definition data.h:82
Dataset get_training_data() const
Definition data.cpp:214
void update_fitness(Population< T > &pop, int island, const Dataset &data, const Parameters &params, bool fit=true, bool validation=true)
Update the fitness of individuals in a population.
Definition evaluation.cpp:9
void assign_fit(Individual< T > &ind, const Dataset &data, const Parameters &params, bool val=false)
Assign fitness to an individual.
static std::map< std::string, float > weightsMap
set parent ids using id values
Definition individual.h:165
unsigned int get_complexity() const
Definition individual.h:126
unsigned int get_size() const
Definition individual.h:124
VectorXf error
training error (used in lexicase selectors)
Definition individual.h:35
Fitness fitness
aggregate fitness score
Definition individual.h:37
vector< string > get_objectives() const
Definition individual.h:178
void set_objectives(vector< string > objs)
Definition individual.h:179
unsigned int get_depth() const
Definition individual.h:125
unsigned int get_linear_complexity() const
Definition individual.h:127
Program< T > program
executable data structure
Definition individual.h:17
bool get_is_fitted() const
Definition individual.h:141
vector< size_t > get_island_indexes(int island)
Definition population.h:39
vector< std::shared_ptr< Individual< T > > > individuals
Definition population.h:19
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
static float MAX_FLT
Definition init.h:61
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
ProgramType PT
Definition program.h:40
ProgramType
Definition types.h:70
Namespace containing scoring functions for evaluation metrics.
void set_linear_complexity(unsigned int new_lc)
Definition fitness.h:80
void set_complexity(unsigned int new_c)
Definition fitness.h:75
void set_loss_v(float f_v)
Definition fitness.h:67
void set_depth(unsigned int new_d)
Definition fitness.h:85
void set_values(vector< float > &v)
Definition fitness.h:129
void set_size(unsigned int new_s)
Definition fitness.h:71
void set_loss(float f)
Definition fitness.h:63
vector< string > get_objectives() const
Definition params.h:148
string scorer
actual loss function used, determined by error
Definition params.h:66
Program< PType > & fit(const Dataset &d)
Definition program.h:151