Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
individual.h
Go to the documentation of this file.
1#ifndef INDIVIDUAL_H
2#define INDIVIDUAL_H
3
5#include "fitness.h"
6
7#include <functional>
8
9using namespace nlohmann;
10
11namespace Brush{
12namespace Pop{
13
14template<ProgramType T>
16public: // TODO: make these private (and work with nlohman json)
18
19 // store just info that we dont have a getter. size, depth, complexity: they can all be obtained with program.<function here>
20
21 // error is the aggregation of error vector, and can be user sppecified
22
23 // this flag is used to avoid re-fitting an individual. the program is_fitted_ flag is used to perform checks (like in predict with weights). They are two different things and I think I;ll keep this way (individual is just a container to keep program and fitness together)
24 bool is_fitted_ = false;
25
26 // archive utility (and also keep track of evolution) (this is meaningful only
27 // if variation is done using the vary() function)
28 unsigned id;
29 vector<unsigned> parent_id;
30
31 VectorXf error;
32
34
35 vector<string> objectives;
36
38 {
39 objectives = {"error", "complexity"};
40 id = 0; // unsigned
41 };
42
44
45 void init(SearchSpace& ss, const Parameters& params)
46 {
47 program = ss.make_program<Program<T>>(params, 0, 0);
48
49 // If different from zero, then the program is created with a fixed depth and size.
50 // If zero, it samples the value
51 // program = SS.make_program<T>(params, params.max_depth, params.max_size);
52 };
53
54 // TODO: replace occurences of program.fit with these (also predict and predict_proba)
55 Individual<T> &fit(const Dataset& data) {
56 program.fit(data);
57 this->is_fitted_ = true;
58 return *this;
59 };
61 {
62 Dataset d(X,y);
63 return fit(d);
64 };
65
66 auto predict(const Dataset& data) { return program.predict(data); };
68 {
69 Dataset d(X);
70 return predict(d);
71 };
72
73 template <ProgramType P = T>
75 auto predict_proba(const Dataset &d) { return program.predict_proba(d); };
76 template <ProgramType P = T>
79 {
80 Dataset d(X);
81 return predict_proba(d);
82 };
83
84 // just getters
85 bool get_is_fitted() const { return this->is_fitted_; };
86 unsigned int get_size() const { return program.size(); };
87 unsigned int get_depth() const { return program.depth(); };
88 unsigned int get_complexity() const { return program.complexity(); };
90
91 string get_model(string fmt="compact", bool pretty=false) {
92 return program.get_model(fmt, pretty); };
93 string get_dot_model(string extras="") {
94 return program.get_dot_model(extras); };
95
97 Fitness& get_fitness() { return fitness; };
98
99 void set_id(unsigned i){id = i;};
100 void set_parents(const vector<Individual<T>>& parents){
101 parent_id.clear();
102 for (const auto& p : parents)
103 parent_id.push_back(p.id);
104 };
105 void set_parents(const vector<unsigned>& parents){ parent_id = parents; };
106
107 // TODO: USE setters and getters intead of accessing it directly
108 // template<ProgramType T>
109 // void Individual<T>::set_objectives(const vector<string>& objectives)
110
111 // Static map for weights associated with strings.
112 // this will determine each fitness metric to be a min/max problem.
113 // generic error metric: by default log and multi_log if it is a
114 // classification problem, and MSE if it is a regression (so its always
115 // a minimization by default, thus "error" has weight -1.0)
116 inline static std::map<std::string, float> weightsMap = {
117 {"complexity", -1.0},
118 {"size", -1.0},
119 {"mse", -1.0},
120 {"log", -1.0},
121 {"multi_log", -1.0},
122 {"average_precision_score", +1.0},
123 {"accuracy", +1.0},
124 {"error", -1.0}
125 };
126
127 vector<string> get_objectives() const { return objectives; };
128 void set_objectives(vector<string> objs){
130
131 vector<float> weights;
132 weights.resize(0);
133 for (const auto& obj : objectives) {
134 auto it = weightsMap.find(obj);
135 if (it != weightsMap.end()) {
136 weights.push_back(it->second);
137 } else {
138 throw std::runtime_error(
139 "Unknown metric used as fitness. Value was " + obj);
140 }
141 }
142
143 fitness.set_weights(weights);
144 };
145};
146
147
148// serialization for Individual
149template<ProgramType T>
150void to_json(json &j, const Individual<T> &p)
151{
152 j = json{
153 {"program", p.program},
154 {"fitness", p.fitness},
155 {"id", p.id},
156 {"parent_id", p.parent_id},
157 {"objectives", p.objectives}
158 };
159}
160
161template<ProgramType T>
162void from_json(const json &j, Individual<T>& p)
163{// TODO: figure out if this works with private attributes and try to actually make them private (and use getters and setters)
164 j.at("program").get_to( p.program );
165 j.at("fitness").get_to( p.fitness );
166 j.at("id").get_to( p.id );
167 j.at("parent_id").get_to( p.parent_id );
168 j.at("objectives").get_to( p.objectives );
169}
170} // Pop
171} // Brush
172
173#endif
void bind_engine(py::module &m, string name)
holds variable type data.
Definition data.h:51
static std::map< std::string, float > weightsMap
set parent ids using id values
Definition individual.h:116
unsigned int get_complexity() const
Definition individual.h:88
string get_model(string fmt="compact", bool pretty=false)
Definition individual.h:91
vector< string > objectives
objectives for use with Pareto selection
Definition individual.h:35
auto predict(const Dataset &data)
Definition individual.h:66
void set_fitness(Fitness &f)
Definition individual.h:96
auto predict_proba(const Dataset &d)
Definition individual.h:75
Individual< T > & fit(const Dataset &data)
Definition individual.h:55
unsigned int get_size() const
Definition individual.h:86
unsigned id
tracking id
Definition individual.h:28
Program< T > & get_program()
Definition individual.h:89
void set_id(unsigned i)
Definition individual.h:99
Fitness & get_fitness()
Definition individual.h:97
VectorXf error
training error (used in lexicase selectors)
Definition individual.h:31
Fitness fitness
aggregate fitness score
Definition individual.h:33
Individual< T > & fit(const Ref< const ArrayXXf > &X, const Ref< const ArrayXf > &y)
Definition individual.h:60
void set_parents(const vector< unsigned > &parents)
set parent ids using parents
Definition individual.h:105
string get_dot_model(string extras="")
Definition individual.h:93
vector< string > get_objectives() const
Definition individual.h:127
auto predict_proba(const Ref< const ArrayXXf > &X)
Definition individual.h:78
void set_objectives(vector< string > objs)
Definition individual.h:128
void set_parents(const vector< Individual< T > > &parents)
Definition individual.h:100
Individual(Program< T > &prg)
Definition individual.h:43
unsigned int get_depth() const
Definition individual.h:87
vector< unsigned > parent_id
ids of parents
Definition individual.h:29
auto predict(const Ref< const ArrayXXf > &X)
Definition individual.h:67
void init(SearchSpace &ss, const Parameters &params)
Definition individual.h:45
Program< T > program
executable data structure
Definition individual.h:17
bool get_is_fitted() const
Definition individual.h:85
void to_json(json &j, const Individual< T > &p)
Definition individual.h:150
void from_json(const json &j, Individual< T > &p)
Definition individual.h:162
< nsga2 selection operator for getting the front
Definition data.cpp:12
Represents the fitness of an individual in the Brush namespace.
Definition fitness.h:25
void set_weights(vector< float > &w)
Definition fitness.h:89
An individual program, a.k.a. model.
Definition program.h:50
TreeType predict_proba(const Dataset &d)
Definition program.h:212
TreeType predict(const Dataset &d)
the standard predict function. Returns the output of the Tree directly.
Definition program.h:175
int complexity() const
count the complexity of the program.
Definition program.h:93
Program< PType > & fit(const Dataset &d)
Definition program.h:142
int depth() const
count the tree depth of the program. The depth is not influenced by weighted nodes.
Definition program.h:120
string get_dot_model(string extras="") const
Get the model as a dot object.
Definition program.h:344
string get_model(string fmt="compact", bool pretty=false) const
Get the model as a string.
Definition program.h:328
int size(bool include_weight=true) const
count the tree size of the program, including the weights in weighted nodes.
Definition program.h:102
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...
PT make_program(const Parameters &params, int max_d=0, int max_size=0)
Makes a random program.