Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
individual.h
Go to the documentation of this file.
1#ifndef INDIVIDUAL_H
2#define INDIVIDUAL_H
3
5#include "fitness.h"
6
7#include <functional>
8
9using namespace nlohmann;
10
11namespace Brush{
12namespace Pop{
13
14template<ProgramType T>
16public: // TODO: make these private (and work with nlohman json)
18
19 // store just info that we dont have a getter. size, depth, complexity: they can all be obtained with program.<function here>
20
21 // error is the aggregation of error vector, and can be user sppecified
22
23 // this flag is used to avoid re-fitting an individual. the program is_fitted_ flag is used to perform checks (like in predict with weights). They are two different things and I think I;ll keep this way (individual is just a container to keep program and fitness together)
24 bool is_fitted_ = false;
25
26 // archive utility (and also keep track of evolution) (this is meaningful only
27 // if variation is done using the vary() function)
28 unsigned id;
29 vector<unsigned> parent_id;
30
31 // storing what changed in relation to parent inside variation
32 string variation = "born"; // spontanegous generation (born), crossover, or which type of mutation
33 vector<Node> sampled_nodes = {}; // nodes that were sampled in mutation
34
35 VectorXf error;
36
38
39 vector<string> objectives;
40
41
43 {
44 objectives = {"scorer", "linear_complexity"};
45 id = 0; // unsigned
46 };
47
49 program = prg;
50 };
51
52 void init(SearchSpace& ss, const Parameters& params)
53 {
54 program = ss.make_program<Program<T>>(params, 0, 0);
55
56 // overriding the objectives with the ones from params (to replace
57 // the generic "scorer" by the actual scorer set in the params object)
58 objectives = params.get_objectives();
59
60 // If different from zero, then the program is created with a fixed depth and size.
61 // If zero, it samples the value
62 // program = SS.make_program<T>(params, params.max_depth, params.max_size);
63
64 variation = "born";
65 };
66
67 // TODO: replace occurences of program.fit with these (also predict and predict_proba)
68 Individual<T> &fit(const Dataset& data) {
69 program.fit(data);
70 this->is_fitted_ = true;
71 return *this;
72 };
73 Individual<T> &fit(const Ref<const ArrayXXf>& X, const Ref<const ArrayXf>& y)
74 {
75 Dataset d(X,y);
76 return fit(d);
77 };
78
86 {
87 program.replace_program(new_program);
88 this->is_fitted_ = false;
89 fitness.clearValues();
90 return *this;
91 };
92
100 {
101 Program<T> new_program = j;
102 return replace_program(new_program);
103 };
104
105 auto predict(const Dataset& data) { return program.predict(data); };
106 auto predict(const Ref<const ArrayXXf>& X)
107 {
108 Dataset d(X);
109 return predict(d);
110 };
111
112 template <ProgramType P = T>
113 requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
114 auto predict_proba(const Dataset &d) { return program.predict_proba(d); };
115 template <ProgramType P = T>
116 requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
117 auto predict_proba(const Ref<const ArrayXXf>& X)
118 {
119 Dataset d(X);
120 return predict_proba(d);
121 };
122
123 // just getters
124 unsigned int get_size() const { return program.size(); };
125 unsigned int get_depth() const { return program.depth(); };
126 unsigned int get_complexity() const { return program.complexity(); };
127 unsigned int get_linear_complexity() const { return program.linear_complexity(); };
129
130 string get_model(string fmt="compact", bool pretty=false) {
131 return program.get_model(fmt, pretty); };
132 string get_dot_model(string extras="") {
133 return program.get_dot_model(extras); };
134
135 void set_fitness(Fitness &f) { fitness=f; };
137
138 void set_variation(string v) { variation=v; };
139 string get_variation() const { return variation; };
140
141 bool get_is_fitted() const { return this->is_fitted_; };
142 void set_is_fitted(bool fitted) { this->is_fitted_ = fitted; };
143
144 void set_sampled_nodes(const vector<Node>& nodes) { sampled_nodes = nodes; };
145 vector<Node> get_sampled_nodes() const { return sampled_nodes; };
146
147 unsigned int get_id(){ return id;};
148 void set_id(unsigned i){id = i;};
149 void set_parents(const vector<Individual<T>>& parents){
150 parent_id.clear();
151 for (const auto& p : parents)
152 parent_id.push_back(p.id);
153 };
154 void set_parents(const vector<unsigned>& parents){ parent_id = parents; };
155
156 // TODO: USE setters and getters intead of accessing it directly
157 // template<ProgramType T>
158 // void Individual<T>::set_objectives(const vector<string>& objectives)
159
160 // Static map for weights associated with strings.
161 // this will determine each fitness metric to be a min/max problem.
162 // generic error metric: by default log and multi_log if it is a
163 // classification problem, and MSE if it is a regression (so its always
164 // a minimization by default, thus "scorer" has weight -1.0)
165 inline static std::map<std::string, float> weightsMap = {
166 {"complexity", -1.0},
167 {"linear_complexity", -1.0},
168 {"size", -1.0},
169 {"mse", -1.0},
170 {"log", -1.0},
171 {"multi_log", -1.0},
172 {"average_precision_score", +1.0},
173 {"balanced_accuracy", +1.0},
174 {"accuracy", +1.0}
175 // {"scorer", -1.0}
176 };
177
178 vector<string> get_objectives() const { return objectives; };
179 void set_objectives(vector<string> objs){
180 objectives=objs;
181
182 vector<float> weights;
183 weights.resize(0);
184 for (const auto& obj : objectives) {
185 // TODO: do i need to use find or this can be done directly?
186 auto it = weightsMap.find(obj);
187 if (it != weightsMap.end()) {
188 weights.push_back(it->second);
189 } else {
190 throw std::runtime_error(
191 "Unknown metric used as fitness. Value was " + obj);
192 }
193 }
194
195 fitness.set_weights(weights);
196 };
197};
198
199
200// serialization for Individual
201template<ProgramType T>
202void to_json(json &j, const Individual<T> &p)
203{
204 // error and sampled nodes are not being serialized
205 j = json{
206 {"program", p.program},
207 {"fitness", p.fitness},
208 {"id", p.id},
209 {"parent_id", p.parent_id},
210 {"objectives", p.objectives},
211 {"is_fitted_", p.is_fitted_},
212 {"variation", p.variation}
213 };
214}
215
216template<ProgramType T>
217void from_json(const json &j, Individual<T>& p)
218{// TODO: figure out if this works with private attributes and try to actually make them private (and use getters and setters)
219 j.at("program").get_to( p.program );
220 j.at("fitness").get_to( p.fitness );
221 j.at("id").get_to( p.id );
222 j.at("parent_id").get_to( p.parent_id );
223 j.at("objectives").get_to( p.objectives );
224 j.at("is_fitted_").get_to( p.is_fitted_ );
225 j.at("variation").get_to( p.variation );
226}
227} // Pop
228} // Brush
229
230#endif
holds variable type data.
Definition data.h:51
static std::map< std::string, float > weightsMap
Definition individual.h:165
void set_is_fitted(bool fitted)
Definition individual.h:142
unsigned int get_id()
Definition individual.h:147
unsigned int get_complexity() const
Definition individual.h:126
Individual< T > & replace_program(const Program< T > &new_program)
Replace the current program with a new program, invalidating fitness.
Definition individual.h:85
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:130
auto predict(const Dataset &data)
Definition individual.h:105
void set_fitness(Fitness &f)
Definition individual.h:135
auto predict_proba(const Dataset &d)
Definition individual.h:114
Individual< T > & fit(const Dataset &data)
Definition individual.h:68
void set_variation(string v)
Definition individual.h:138
void set_sampled_nodes(const vector< Node > &nodes)
Definition individual.h:144
unsigned int get_size() const
Definition individual.h:124
Program< T > & get_program()
Definition individual.h:128
void set_id(unsigned i)
Definition individual.h:148
vector< Node > get_sampled_nodes() const
Definition individual.h:145
Fitness & get_fitness()
Definition individual.h:136
Individual< T > & fit(const Ref< const ArrayXXf > &X, const Ref< const ArrayXf > &y)
Definition individual.h:73
string get_variation() const
Definition individual.h:139
void set_parents(const vector< unsigned > &parents)
set parent ids using parents
Definition individual.h:154
string get_dot_model(string extras="")
Definition individual.h:132
vector< string > get_objectives() const
Definition individual.h:178
auto predict_proba(const Ref< const ArrayXXf > &X)
Definition individual.h:117
void set_objectives(vector< string > objs)
Definition individual.h:179
void set_parents(const vector< Individual< T > > &parents)
Definition individual.h:149
Individual(Program< T > &prg)
Definition individual.h:48
unsigned int get_depth() const
Definition individual.h:125
auto predict(const Ref< const ArrayXXf > &X)
Definition individual.h:106
unsigned int get_linear_complexity() const
Definition individual.h:127
Individual< T > & replace_program(const json &j)
Replace the current program from a JSON representation, invalidating fitness.
Definition individual.h:99
void init(SearchSpace &ss, const Parameters &params)
Definition individual.h:52
bool get_is_fitted() const
Definition individual.h:141
void to_json(json &j, const Individual< T > &p)
Definition individual.h:202
void from_json(const json &j, Individual< T > &p)
Definition individual.h:217
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
Represents the fitness of an individual in the Brush namespace.
Definition fitness.h:25
vector< string > get_objectives() const
Definition params.h:148
An individual program, a.k.a. model.
Definition program.h:50
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...
PT make_program(const Parameters &params, int max_d=0, int max_size=0)
Makes a random program.