Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
metrics.cpp
Go to the documentation of this file.
1#include "metrics.h"
2
3namespace Brush {
4namespace Eval {
5
6/* Scoring functions */
7
9float mse(const VectorXf& y, const VectorXf& yhat, VectorXf& loss,
10 const vector<float>& class_weights)
11{
12 loss = (yhat - y).array().pow(2);
13 return loss.mean();
14}
15
16
17VectorXf log_loss(const VectorXf& y, const VectorXf& predict_proba,
18 const vector<float>& class_weights)
19{
20 // See comments on weight_optimizer to learn more about why am I using
21 // this value for eps. TL;DR: dont change, can cause weird behaviour
22 float eps = 1e-6f;
23
24 VectorXf loss;
25
26 loss.resize(y.rows());
27 for (unsigned i = 0; i < y.rows(); ++i)
28 {
29 if (predict_proba(i) < eps || 1 - predict_proba(i) < eps)
30 // clip probabilities since log loss is undefined for predict_proba=0 or predict_proba=1
31 loss(i) = -(y(i)*log(eps) + (1-y(i))*log(1-eps));
32 else
33 loss(i) = -(y(i)*log(predict_proba(i)) + (1-y(i))*log(1-predict_proba(i)));
34
35 if (loss(i)<0)
36 std::runtime_error("loss(i)= " + to_string(loss(i))
37 + ". y = " + to_string(y(i)) + ", predict_proba(i) = "
38 + to_string(predict_proba(i)));
39 }
40
41 return loss;
42}
43
45float mean_log_loss(const VectorXf& y,
46 const VectorXf& predict_proba, VectorXf& loss,
47 const vector<float>& class_weights)
48{
49 loss = log_loss(y,predict_proba,class_weights);
50
51 if (!class_weights.empty())
52 {
53 float sum_weights = 0;
54
55 // we keep loss without weights, as this may affect lexicase
56 VectorXf weighted_loss;
57 weighted_loss.resize(y.rows());
58 for (unsigned i = 0; i < y.rows(); ++i)
59 {
60 weighted_loss(i) = loss(i) * class_weights.at(y(i));
61 sum_weights += class_weights.at(y(i));
62 }
63
64 // equivalent of sklearn's log_loss with weights. It uses np.average,
65 // which returns avg = sum(a * weights) / sum(weights)
66 return weighted_loss.sum() / sum_weights; // normalize weight contributions
67 }
68
69 return loss.mean();
70}
71
72// accuracy
73float zero_one_loss(const VectorXf& y,
74 const VectorXf& predict_proba, VectorXf& loss,
75 const vector<float>& class_weights )
76{
77 VectorXi yhat = (predict_proba.array() > 0.5).cast<int>();
78
79 // we are actually finding wrong predictions here
80 loss = (yhat.array() != y.cast<int>().array()).cast<float>();
81
82 // Apply class weights if provided
83 float scale = 0.0f;
84 if (!class_weights.empty()) {
85 for (int i = 0; i < y.rows(); ++i) {
86 loss(i) *= class_weights.at(y(i));
87 scale += class_weights.at(y(i));
88 }
89 }
90 else
91 {
92 scale = static_cast<float>(loss.size());
93 }
94
95 // since `loss` contains wrong predictions, we need to invert it
96 return 1.0 - (loss.sum() / scale);
97}
98
99// balanced accuracy
100float bal_zero_one_loss(const VectorXf& y,
101 const VectorXf& predict_proba, VectorXf& loss,
102 const vector<float>& class_weights )
103{
104 VectorXi yhat = (predict_proba.array() > 0.5).cast<int>();
105
106 loss = (yhat.array() != y.cast<int>().array()).cast<float>();
107
108 float TP = 0;
109 float FP = 0;
110 float TN = 0;
111 float FN = 0;
112
113 int num_instances = y.rows();
114 for (int i = 0; i < num_instances; ++i) {
115 float weight = 1.0f; // it is a balanced metric; ignoring class weights
116 // float weight = class_weights.empty() ? 1.0f : class_weights.at(y(i));
117
118 if (yhat(i) == 1.0 && y(i) == 1.0) TP += weight;
119 else if (yhat(i) == 1.0 && y(i) == 0.0) FP += weight;
120 else if (yhat(i) == 0.0 && y(i) == 0.0) TN += weight;
121 else FN += weight;
122 }
123
124 float eps = 1e-6f;
125
126 float TPR = (TP + eps) / (TP + FN + eps);
127 float TNR = (TN + eps) / (TN + FP + eps);
128
129 return (TPR + TNR) / 2.0;
130}
131
132float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
133 VectorXf& loss,
134 const vector<float>& class_weights) {
135
136 // AP is implemented as AUC PR in sklearn.
137 // AP summarizes a precision-recall curve as the weighted mean of precisions
138 // achieved at each threshold, with the increase in recall from the previous threshold used as the weight
139
140 // Assuming y contains binary labels (0 or 1)
141 int num_instances = y.size();
142
143 float eps = 1e-6f; // first we set the loss vector values
144 loss.resize(num_instances);
145 for (int i = 0; i < num_instances; ++i) {
146 float p = predict_proba(i);
147
148 // The loss vector is used in lexicase selection. we need to set something useful here
149 // that does make sense on individual level. Using log loss here.
150 if (p < eps || 1 - p < eps)
151 loss(i) = -(y(i)*log(eps) + (1-y(i))*log(1-eps));
152 else
153 loss(i) = -(y(i)*log(p) + (1-y(i))*log(1-p));
154 }
155
156 // get argsort of predict proba (descending)
157 vector<int> order(num_instances);
158 iota(order.begin(), order.end(), 0);
159 stable_sort(order.begin(), order.end(), [&](int i, int j) {
160 return predict_proba(i) > predict_proba(j); // descending
161 });
162
163 float ysum = 0.0f;
164 vector<float> y_sorted(num_instances); // y true
165 vector<float> p_sorted(num_instances); // pred probas
166 vector<float> w_sorted(num_instances); // sample weights
167 for (int i = 0; i < num_instances; ++i) {
168 int idx = order[i];
169
170 y_sorted[i] = y(idx);
171 p_sorted[i] = predict_proba(idx);
172 w_sorted[i] = class_weights.empty() ? 1.0f : class_weights.at(y_sorted[i]);
173
174 ysum += y_sorted[i] * w_sorted[i];
175 }
176
177 // when all scores are the same, the sort order is arbitrary, so the PR curve
178 // you integrate is a staircase instead of a flat line. Sklearn avoids this by
179 // treating ties as one threshold.
180
181 // Find the indexes where prediction changes, so we can treat it as one block
182 vector<int> unique_indices = {};
183 set<int> unique_probas = {}; // keep track of unique elements
184
185 for (int i=0; i<p_sorted.size(); --i)
186 if (unique_probas.insert(p_sorted.at(i)).second)
187 unique_indices.push_back(i);
188 unique_indices.push_back(num_instances); // last index
189
190 float tp = 0.0f;
191 float fp = 0.0f;
192 vector<float> precision = {1.0};
193 vector<float> recall = {0.0};
194
195 for (size_t i = 0; i < unique_indices.size() - 1; ++i) {
196 int start = unique_indices[i];
197 int end = unique_indices[i+1];
198
199 // process group with a for loop (aggregating for each sample)
200 for (int j = start; j < end; ++j) {
201 tp += y_sorted.at(j) * w_sorted.at(j);
202 fp += (1.0f - y_sorted.at(j)) * w_sorted.at(j);
203 }
204
205 float relevant = tp + fp;
206 precision.push_back(relevant == 0.0f ? 0.0f : tp / relevant);
207 recall.push_back(ysum == 0.0f ? 1.0f : tp / ysum);
208 }
209
210 // integrate PR curve
211 float average_precision = 0.0f;
212 for (size_t i = 0; i < precision.size() - 1; ++i) {
213 average_precision += (recall[i+1] - recall[i]) * precision[i+1];
214 }
215
216 return average_precision;
217}
218
219// multinomial log loss
220VectorXf multi_log_loss(const VectorXf& y, const ArrayXXf& predict_proba,
221 const vector<float>& class_weights)
222{
223 // TODO: fix softmax and multiclassification, then implement this
224 VectorXf loss = VectorXf::Zero(y.rows());
225
226 // TODO: needs to be the index of unique elements
227 // get class labels
228 // vector<float> uc = unique( ArrayXi(y.cast<int>()) );
229
230 // float eps = 1e-6f;
231 // float sum_weights = 0;
232 // for (unsigned i = 0; i < y.rows(); ++i)
233 // {
234 // for (const auto& c : uc)
235 // {
236 // // for specific class
237 // ArrayXf yhat = predict_proba.col(int(c));
238
239
240 // /* float yi = y(i) == c ? 1.0 : 0.0 ; */
241
242 // if (y(i) == c)
243 // {
244 // if (yhat(i) < eps || 1 - yhat(i) < eps)
245 // {
246 // // clip probabilities since log loss is undefined for yhat=0 or yhat=1
247 // loss(i) += -log(eps);
248 // }
249 // else
250 // {
251 // loss(i) += -log(yhat(i));
252 // }
253
254 // }
255
256 // }
257 // if (!class_weights.empty()){
258
259 // loss(i) = loss(i)*class_weights.at(y(i));
260 // sum_weights += class_weights.at(y(i));
261 // }
262 // }
263 // if (sum_weights > 0)
264 // loss = loss.array() / sum_weights * y.size();
265
266
267
268 return loss;
269}
270
271float mean_multi_log_loss(const VectorXf& y,
272 const ArrayXXf& predict_proba, VectorXf& loss,
273 const vector<float>& class_weights)
274{
275 loss = multi_log_loss(y, predict_proba, class_weights);
276
277
278
279 return loss.mean();
280}
281
282float multi_zero_one_loss(const VectorXf& y,
283 const ArrayXXf& predict_proba, VectorXf& loss,
284 const vector<float>& class_weights )
285{
286 // TODO: implement this
287 // vector<float> uc = unique(y);
288 // vector<int> c;
289 // for (const auto& i : uc)
290 // c.push_back(int(i));
291
292 // // sensitivity (TP) and specificity (TN)
293 // vector<float> TP(c.size(),0.0), TN(c.size(), 0.0), P(c.size(),0.0), N(c.size(),0.0);
294 // ArrayXf class_accuracies(c.size());
295
296 // // get class counts
297
298 // for (unsigned i=0; i< c.size(); ++i)
299 // {
300 // P.at(i) = (y.array().cast<int>() == c.at(i)).count(); // total positives for this class
301 // N.at(i) = (y.array().cast<int>() != c.at(i)).count(); // total negatives for this class
302 // }
303
304
305 // for (unsigned i = 0; i < y.rows(); ++i)
306 // {
307 // if (yhat(i) == y(i)) // true positive
308 // ++TP.at(y(i) == -1 ? 0 : y(i)); // if-then ? accounts for -1 class encoding
309
310 // for (unsigned j = 0; j < c.size(); ++j)
311 // if ( y(i) !=c.at(j) && yhat(i) != c.at(j) ) // true negative
312 // ++TN.at(j);
313
314 // }
315
316 // // class-wise accuracy = 1/2 ( true positive rate + true negative rate)
317 // for (unsigned i=0; i< c.size(); ++i){
318 // class_accuracies(i) = (TP.at(i)/P.at(i) + TN.at(i)/N.at(i))/2;
319
320
321
322 // }
323
324 // // set loss vectors if third argument supplied
325 // loss = (yhat.cast<int>().array() != y.cast<int>().array()).cast<float>();
326
327 // return 1.0 - class_accuracies.mean();
328
329 return 0.0;
330}
331
332} // metrics
333} // Brush
float multi_zero_one_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for multi-classification.
Definition metrics.cpp:282
float zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for binary classification.
Definition metrics.cpp:73
float mean_log_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
log loss
Definition metrics.cpp:45
float mean_multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the mean multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:271
float average_precision_score(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the average precision score between the predicted probabilities and the true labels.
Definition metrics.cpp:132
float mse(const VectorXf &y, const VectorXf &yhat, VectorXf &loss, const vector< float > &class_weights)
mean squared error
Definition metrics.cpp:9
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, const vector< float > &class_weights)
Calculates the multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:220
VectorXf log_loss(const VectorXf &y, const VectorXf &predict_proba, const vector< float > &class_weights)
Calculates the log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:17
float bal_zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Balanced accuracy for binary classification.
Definition metrics.cpp:100
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
< nsga2 selection operator for getting the front
Definition bandit.cpp:4