Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
scorer.h
Go to the documentation of this file.
1#ifndef SCORER_H
2#define SCORER_H
3
4#include "metrics.h"
5#include "../util/error.h"
6#include "../types.h"
7
8// code to evaluate GP programs.
9namespace Brush{
10
11using namespace Pop;
12
13namespace Eval{
14
15
16template <ProgramType P>
17class Scorer
18{
19
20using RetType =
21 typename std::conditional_t<P == PT::Regressor, ArrayXf,
22 std::conditional_t<P == PT::Representer, ArrayXXf, ArrayXf
23 >>;
24
25typedef float (*funcPointer)(const VectorXf&,
26 const VectorXf&,
27 VectorXf&,
28 const vector<float>&);
29public:
30 // map the string into a function to be called when calculating the score
31 std::map<string, funcPointer> score_hash;
32 string scorer;
33
34 // TODO: add more scores, include them here, add to score_hash
35 Scorer(string scorer="mse") {
36 score_hash["mse"] = &mse;
37
38 this->set_scorer(scorer);
39 };
40
41 void set_scorer(string scorer){ this->scorer = scorer; };
42 string get_scorer(){return this->scorer; };
43
44 /* void set_scorer(string scorer); */
45 float score(const VectorXf& y_true, const VectorXf& y_pred,
46 VectorXf& loss, const vector<float>& w)
47 {
48 // loss is an array passed by reference to store each prediction (used in lexicase)
49 // weights are used to give more or less importance for a given sample.
50 // Every scorer must have the same function signature, but arent required to use all info
51
52 if ( score_hash.find(this->scorer) == score_hash.end() )
53 {
54 HANDLE_ERROR_THROW("Scoring function '" + this->scorer + "' not defined");
55 return 0.0;
56 }
57 else
58 {
59 return score_hash.at(this->scorer)(y_true, y_pred, loss, w);
60 }
61 };
62
63 float score(Individual<P>& ind, Dataset& data,
64 VectorXf& loss, const Parameters& params)
65 {
66 RetType y_pred = ind.predict(data);
67 return score(data.y, y_pred, loss, params.class_weights);
68 }
69};
70
71
72// TODO: improve this so we dont have a lot of different declarations
73template <ProgramType P>
74 requires( P == PT::BinaryClassifier)
76{
77
78using RetType = ArrayXf;
79
80typedef float (*funcPointer)(const VectorXf&,
81 const VectorXf&,
82 VectorXf&,
83 const vector<float>&);
84public:
85 // map the string into a function to be called when calculating the score
86 std::map<string, funcPointer> score_hash;
87 string scorer;
88
89 Scorer(string scorer="log") {
90 score_hash["log"] = &mean_log_loss;
91 score_hash["average_precision_score"] = &average_precision_score;
92 score_hash["accuracy"] = &zero_one_loss;
93 score_hash["balanced_accuracy"] = &bal_zero_one_loss;
94
95 this->set_scorer(scorer);
96 };
97
98 void set_scorer(string scorer){ this->scorer = scorer; };
99 string get_scorer(){return this->scorer; };
100
101 /* void set_scorer(string scorer); */
102 float score(const VectorXf& y_true, const VectorXf& y_pred,
103 VectorXf& loss, const vector<float>& w)
104 {
105 if ( score_hash.find(this->scorer) == score_hash.end() )
106 {
107 // not found
108 HANDLE_ERROR_THROW("Scoring function '" + this->scorer
109 + "' not defined");
110 return 0.0;
111 }
112 else
113 {
114 // found
115 return score_hash.at(this->scorer)(y_true, y_pred, loss, w);
116 }
117 };
118
119 float score(Individual<P>& ind, Dataset& data,
120 VectorXf& loss, const Parameters& params)
121 {
122 RetType y_pred = ind.predict_proba(data); // .template cast<float>();
123
124 vector<float> class_weights = {};
125
126 // calculate class weights based on current data --- instead of using a pre-calculated value.
127 // This is only true for the scoring. For other usages of class weights, then
128 // the training data is used.
129 if (params.class_weights_type == "support")
130 {
131 class_weights.resize(params.n_classes);
132 for (unsigned i = 0; i < params.n_classes; ++i){
133 // weighting by support
134 int support = (data.y.cast<int>().array() == i).count();
135
136 if (support==0)
137 class_weights.at(i) = 0.0f;
138 else
139 class_weights.at(i) = float(data.y.size()) / float(params.n_classes * support);
140 }
141 }
142 else // else it is either unbalanced or user_defined
143 {
144 // if unbalanced, class_weights is empty. if user_defined,
145 // then we should use the provided values anyways
146 class_weights = params.class_weights;
147 }
148
149 return score(data.y, y_pred, loss, class_weights);
150 }
151};
152
153template <ProgramType P>
154 requires(P == PT::MulticlassClassifier)
155class Scorer<P>
156{
157
158using RetType = ArrayXXf;
159
160typedef float (*funcPointer)(const VectorXf&,
161 const ArrayXXf&,
162 VectorXf&,
163 const vector<float>&);
164public:
165 // map the string into a function to be called when calculating the score
166 std::map<string, funcPointer> score_hash;
167 string scorer;
168
169 // TODO: I need to test this stuff
170 Scorer(string scorer="multi_log") {
171 score_hash["multi_log"] = &mean_multi_log_loss;
172 score_hash["accuracy"] = &multi_zero_one_loss;
173
174 this->set_scorer(scorer);
175 };
176
177 void set_scorer(string scorer){ this->scorer = scorer; };
178 string get_scorer(){return this->scorer; };
179
180 /* void set_scorer(string scorer); */
181 float score(const VectorXf& y_true, const ArrayXXf& y_pred,
182 VectorXf& loss, const vector<float>& w)
183 {
184 // loss is an array passed by reference to store each prediction (used in lexicase)
185 // weights are used to give more or less importance for a given sample.
186 // Every scorer must have the same function signature, but arent required to use all info
187
188 if ( score_hash.find(this->scorer) == score_hash.end() )
189 {
190 // not found
191 HANDLE_ERROR_THROW("Scoring function '" + this->scorer
192 + "' not defined");
193 return 0.0;
194 }
195 else
196 {
197 // found
198 return score_hash.at(this->scorer)(y_true, y_pred, loss, w);
199 }
200 };
201
202 float score(Individual<P>& ind, Dataset& data,
203 VectorXf& loss, const Parameters& params)
204 {
205 RetType y_pred = ind.predict_proba(data); // .template cast<float>();
206
207 vector<float> class_weights = {};
208 if (params.class_weights_type == "support")
209 {
210 class_weights.resize(params.n_classes);
211 for (unsigned i = 0; i < params.n_classes; ++i){
212 // weighting by support
213 int support = (data.y.cast<int>().array() == i).count();
214
215 if (support==0)
216 class_weights.at(i) = 0.0;
217 else
218 class_weights.at(i) = float(data.y.size()) / float(params.n_classes * support);
219 }
220 } // or else it is either unbalanced or user_defined
221 else {
222 class_weights = params.class_weights;
223 }
224
225 return score(data.y, y_pred, loss, class_weights);
226 }
227};
228
229}
230}
231#endif
void set_scorer(string scorer)
Definition scorer.h:98
float score(Individual< P > &ind, Dataset &data, VectorXf &loss, const Parameters &params)
Definition scorer.h:119
float score(const VectorXf &y_true, const ArrayXXf &y_pred, VectorXf &loss, const vector< float > &w)
Definition scorer.h:181
Scorer(string scorer="multi_log")
Definition scorer.h:170
Scorer(string scorer="log")
Definition scorer.h:89
std::map< string, funcPointer > score_hash
Definition scorer.h:86
float score(const VectorXf &y_true, const VectorXf &y_pred, VectorXf &loss, const vector< float > &w)
Definition scorer.h:102
float(* funcPointer)(const VectorXf &, const VectorXf &, VectorXf &, const vector< float > &)
Definition scorer.h:80
string get_scorer()
Definition scorer.h:42
void set_scorer(string scorer)
Definition scorer.h:41
float score(Individual< P > &ind, Dataset &data, VectorXf &loss, const Parameters &params)
Definition scorer.h:63
typename std::conditional_t< P==PT::Regressor, ArrayXf, std::conditional_t< P==PT::Representer, ArrayXXf, ArrayXf > > RetType
Definition scorer.h:20
Scorer(string scorer="mse")
Definition scorer.h:35
std::map< string, funcPointer > score_hash
Definition scorer.h:31
float score(const VectorXf &y_true, const VectorXf &y_pred, VectorXf &loss, const vector< float > &w)
Definition scorer.h:45
float(* funcPointer)(const VectorXf &, const VectorXf &, VectorXf &, const vector< float > &)
Definition scorer.h:25
auto predict(const Dataset &data)
Definition individual.h:105
auto predict_proba(const Dataset &d)
Definition individual.h:114
holds variable type data.
Definition data.h:51
ArrayXf y
length N array, the target label
Definition data.h:82
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
float multi_zero_one_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for multi-classification.
Definition metrics.cpp:294
float zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for binary classification.
Definition metrics.cpp:73
float mean_log_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
log loss
Definition metrics.cpp:45
float mean_multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the mean multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:285
float average_precision_score(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the average precision score between the predicted probabilities and the true labels.
Definition metrics.cpp:132
float mse(const VectorXf &y, const VectorXf &yhat, VectorXf &loss, const vector< float > &class_weights)
mean squared error
Definition metrics.cpp:9
float bal_zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Balanced accuracy for binary classification.
Definition metrics.cpp:100
< nsga2 selection operator for getting the front
Definition bandit.cpp:4
vector< float > class_weights
weights for each class
Definition params.h:69
unsigned int n_classes
Definition params.h:76
string class_weights_type
Definition params.h:71