Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
metrics.cpp
Go to the documentation of this file.
1#include "metrics.h"
2
3namespace Brush {
4namespace Eval {
5
6/* Scoring functions */
7
9float mse(const VectorXf& y, const VectorXf& yhat, VectorXf& loss,
10 const vector<float>& class_weights)
11{
12 loss = (yhat - y).array().pow(2);
13 return loss.mean();
14}
15
16
17VectorXf log_loss(const VectorXf& y, const VectorXf& predict_proba,
18 const vector<float>& class_weights)
19{
20 // See comments on weight_optimizer to learn more about why am I using
21 // this value for eps. TL;DR: dont change, can cause weird behaviour
22 float eps = 1e-6f;
23
24 VectorXf loss;
25
26 loss.resize(y.rows());
27 for (unsigned i = 0; i < y.rows(); ++i)
28 {
29 if (predict_proba(i) < eps || 1 - predict_proba(i) < eps)
30 // clip probabilities since log loss is undefined for predict_proba=0 or predict_proba=1
31 loss(i) = -(y(i)*log(eps) + (1-y(i))*log(1-eps));
32 else
33 loss(i) = -(y(i)*log(predict_proba(i)) + (1-y(i))*log(1-predict_proba(i)));
34
35 if (loss(i)<0)
36 std::runtime_error("loss(i)= " + to_string(loss(i))
37 + ". y = " + to_string(y(i)) + ", predict_proba(i) = "
38 + to_string(predict_proba(i)));
39 }
40
41 return loss;
42}
43
45float mean_log_loss(const VectorXf& y,
46 const VectorXf& predict_proba, VectorXf& loss,
47 const vector<float>& class_weights)
48{
49 loss = log_loss(y,predict_proba,class_weights);
50
51 if (!class_weights.empty())
52 {
53 float sum_weights = 0;
54
55 // we keep loss without weights, as this may affect lexicase
56 VectorXf weighted_loss;
57 weighted_loss.resize(y.rows());
58 for (unsigned i = 0; i < y.rows(); ++i)
59 {
60 weighted_loss(i) = loss(i) * class_weights.at(y(i));
61 sum_weights += class_weights.at(y(i));
62 }
63
64 // equivalent of sklearn's log_loss with weights. It uses np.average,
65 // which returns avg = sum(a * weights) / sum(weights)
66 return weighted_loss.sum() / sum_weights; // normalize weight contributions
67 }
68
69 return loss.mean();
70}
71
72// accuracy
73float zero_one_loss(const VectorXf& y,
74 const VectorXf& predict_proba, VectorXf& loss,
75 const vector<float>& class_weights )
76{
77 VectorXi yhat = (predict_proba.array() > 0.5).cast<int>();
78
79 // we are actually finding wrong predictions here
80 loss = (yhat.array() != y.cast<int>().array()).cast<float>();
81
82 // Apply class weights if provided
83 float scale = 0.0f;
84 if (!class_weights.empty()) {
85 for (int i = 0; i < y.rows(); ++i) {
86 loss(i) *= class_weights.at(y(i));
87 scale += class_weights.at(y(i));
88 }
89 }
90 else
91 {
92 scale = static_cast<float>(loss.size());
93 }
94
95 // since `loss` contains wrong predictions, we need to invert it
96 return 1.0 - (loss.sum() / scale);
97}
98
99// balanced accuracy
100float bal_zero_one_loss(const VectorXf& y,
101 const VectorXf& predict_proba, VectorXf& loss,
102 const vector<float>& class_weights )
103{
104 VectorXi yhat = (predict_proba.array() > 0.5).cast<int>();
105
106 loss = (yhat.array() != y.cast<int>().array()).cast<float>();
107
108 float TP = 0;
109 float FP = 0;
110 float TN = 0;
111 float FN = 0;
112
113 int num_instances = y.rows();
114 for (int i = 0; i < num_instances; ++i) {
115 float weight = 1.0f; // it is a balanced metric; ignoring class weights
116 // float weight = class_weights.empty() ? 1.0f : class_weights.at(y(i));
117
118 if (yhat(i) == 1.0 && y(i) == 1.0) TP += weight;
119 else if (yhat(i) == 1.0 && y(i) == 0.0) FP += weight;
120 else if (yhat(i) == 0.0 && y(i) == 0.0) TN += weight;
121 else FN += weight;
122 }
123
124 float eps = 1e-6f;
125
126 float TPR = (TP + eps) / (TP + FN + eps);
127 float TNR = (TN + eps) / (TN + FP + eps);
128
129 return (TPR + TNR) / 2.0;
130}
131
132float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
133 VectorXf& loss,
134 const vector<float>& class_weights) {
135
136 // AP is implemented as AUC PR in sklearn.
137 // AP summarizes a precision-recall curve as the weighted mean of precisions
138 // achieved at each threshold, with the increase in recall from the previous threshold used as the weight
139
140 // Assuming y contains binary labels (0 or 1)
141 int num_instances = y.size();
142
143 float eps = 1e-4f; // first we set the loss vector values
144 loss.resize(num_instances);
145 for (int i = 0; i < num_instances; ++i) {
146 float p = predict_proba(i);
147
148 // The loss vector is used in lexicase selection. we need to set something useful here
149 // that does make sense on individual level. Using log loss here.
150 if (p < eps || 1 - p < eps)
151 loss(i) = -(y(i)*log(eps) + (1-y(i))*log(1-eps));
152 else
153 loss(i) = -(y(i)*log(p) + (1-y(i))*log(1-p));
154 }
155
156 // get argsort of predict proba (descending)
157 vector<int> order(num_instances);
158 iota(order.begin(), order.end(), 0);
159 stable_sort(order.begin(), order.end(), [&](int i, int j) {
160 return predict_proba(i) > predict_proba(j); // descending
161 });
162
163 float ysum = 0.0f;
164 vector<float> y_sorted(num_instances); // y true
165 vector<float> p_sorted(num_instances); // pred probas
166 vector<float> w_sorted(num_instances); // sample weights
167 for (int i = 0; i < num_instances; ++i) {
168 int idx = order[i];
169
170 y_sorted[i] = y(idx);
171 p_sorted[i] = predict_proba(idx);
172 w_sorted[i] = class_weights.empty() ? 1.0f : class_weights.at(y(idx));
173
174 ysum += y_sorted[i] * w_sorted[i];
175 }
176
177 // when all scores are the same, the sort order is arbitrary, so the PR curve
178 // you integrate is a staircase instead of a flat line. Sklearn avoids this by
179 // treating ties as one threshold.
180 // however, this does not produce consistent results, so we will handle flat
181 // lines below
182
183 // detect constant prediction case (all p_sorted equal within tolerance).
184 // because p_sorted is sorted, the first element is the maximum, and the last is the minimum,
185 if (abs(p_sorted.back() - p_sorted.front()) <= eps) {
186 // All predictions are (effectively) constant.
187 float total_weight = std::accumulate(w_sorted.begin(), w_sorted.end(), 0.0f);
188
189 // Return weighted positives / total weight, matching sklearn's result for constant scores
190 // (kinda weighted prevalence)
191 return total_weight == 0.0f ? 0.0f : ysum / total_weight;
192 }
193
194 // Find the indexes where prediction changes, so we can treat it as one block
195 vector<int> unique_indices = {}; // this one will be used to calculate the AUC
196 set<int> unique_probas = {}; // keep track of unique elements (this wont be used other than that)
197
198 for (int i=0; i<p_sorted.size(); ++i)
199 if (unique_probas.insert(p_sorted.at(i)).second)
200 unique_indices.push_back(i);
201
202 unique_indices.push_back(num_instances); // last index is the number of elements
203
204 float tp = 0.0f;
205 float fp = 0.0f;
206 vector<float> precision = {1.0};
207 vector<float> recall = {0.0};
208
209 for (size_t i = 0; i < unique_indices.size() - 1; ++i) {
210 int start = unique_indices[i];
211 int end = unique_indices[i+1];
212
213 // process group with a for loop (aggregating for each sample)
214 for (int j = start; j < end; ++j) {
215 tp += y_sorted.at(j) * w_sorted.at(j);
216 fp += (1.0f - y_sorted.at(j)) * w_sorted.at(j);
217
218 float relevant = tp + fp;
219 precision.push_back(relevant == 0.0f ? 0.0f : tp / relevant);
220 recall.push_back(ysum == 0.0f ? 1.0f : tp / ysum);
221 }
222 }
223
224 // integrate PR curve
225 float average_precision = 0.0f;
226 for (size_t i = 0; i < num_instances; ++i) {
227 average_precision += (recall[i+1] - recall[i]) * precision[i+1];
228 }
229
230 return average_precision;
231}
232
233// multinomial log loss
234VectorXf multi_log_loss(const VectorXf& y, const ArrayXXf& predict_proba,
235 const vector<float>& class_weights)
236{
237 // TODO: fix softmax and multiclassification, then implement this
238 VectorXf loss = VectorXf::Zero(y.rows());
239
240 // TODO: needs to be the index of unique elements
241 // get class labels
242 // vector<float> uc = unique( ArrayXi(y.cast<int>()) );
243
244 // float eps = 1e-6f;
245 // float sum_weights = 0;
246 // for (unsigned i = 0; i < y.rows(); ++i)
247 // {
248 // for (const auto& c : uc)
249 // {
250 // // for specific class
251 // ArrayXf yhat = predict_proba.col(int(c));
252
253
254 // /* float yi = y(i) == c ? 1.0 : 0.0 ; */
255
256 // if (y(i) == c)
257 // {
258 // if (yhat(i) < eps || 1 - yhat(i) < eps)
259 // {
260 // // clip probabilities since log loss is undefined for yhat=0 or yhat=1
261 // loss(i) += -log(eps);
262 // }
263 // else
264 // {
265 // loss(i) += -log(yhat(i));
266 // }
267
268 // }
269
270 // }
271 // if (!class_weights.empty()){
272
273 // loss(i) = loss(i)*class_weights.at(y(i));
274 // sum_weights += class_weights.at(y(i));
275 // }
276 // }
277 // if (sum_weights > 0)
278 // loss = loss.array() / sum_weights * y.size();
279
280
281
282 return loss;
283}
284
285float mean_multi_log_loss(const VectorXf& y,
286 const ArrayXXf& predict_proba, VectorXf& loss,
287 const vector<float>& class_weights)
288{
289 loss = multi_log_loss(y, predict_proba, class_weights);
290
291 return loss.mean();
292}
293
294float multi_zero_one_loss(const VectorXf& y,
295 const ArrayXXf& predict_proba, VectorXf& loss,
296 const vector<float>& class_weights )
297{
298 // TODO: implement this
299 // vector<float> uc = unique(y);
300 // vector<int> c;
301 // for (const auto& i : uc)
302 // c.push_back(int(i));
303
304 // // sensitivity (TP) and specificity (TN)
305 // vector<float> TP(c.size(),0.0), TN(c.size(), 0.0), P(c.size(),0.0), N(c.size(),0.0);
306 // ArrayXf class_accuracies(c.size());
307
308 // // get class counts
309
310 // for (unsigned i=0; i< c.size(); ++i)
311 // {
312 // P.at(i) = (y.array().cast<int>() == c.at(i)).count(); // total positives for this class
313 // N.at(i) = (y.array().cast<int>() != c.at(i)).count(); // total negatives for this class
314 // }
315
316
317 // for (unsigned i = 0; i < y.rows(); ++i)
318 // {
319 // if (yhat(i) == y(i)) // true positive
320 // ++TP.at(y(i) == -1 ? 0 : y(i)); // if-then ? accounts for -1 class encoding
321
322 // for (unsigned j = 0; j < c.size(); ++j)
323 // if ( y(i) !=c.at(j) && yhat(i) != c.at(j) ) // true negative
324 // ++TN.at(j);
325
326 // }
327
328 // // class-wise accuracy = 1/2 ( true positive rate + true negative rate)
329 // for (unsigned i=0; i< c.size(); ++i){
330 // class_accuracies(i) = (TP.at(i)/P.at(i) + TN.at(i)/N.at(i))/2;
331
332
333
334 // }
335
336 // // set loss vectors if third argument supplied
337 // loss = (yhat.cast<int>().array() != y.cast<int>().array()).cast<float>();
338
339 // return 1.0 - class_accuracies.mean();
340
341 return 0.0;
342}
343
344} // metrics
345} // Brush
float multi_zero_one_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for multi-classification.
Definition metrics.cpp:294
float zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Accuracy for binary classification.
Definition metrics.cpp:73
float mean_log_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
log loss
Definition metrics.cpp:45
float mean_multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the mean multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:285
float average_precision_score(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the average precision score between the predicted probabilities and the true labels.
Definition metrics.cpp:132
float mse(const VectorXf &y, const VectorXf &yhat, VectorXf &loss, const vector< float > &class_weights)
mean squared error
Definition metrics.cpp:9
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, const vector< float > &class_weights)
Calculates the multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:234
VectorXf log_loss(const VectorXf &y, const VectorXf &predict_proba, const vector< float > &class_weights)
Calculates the log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:17
float bal_zero_one_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Balanced accuracy for binary classification.
Definition metrics.cpp:100
string to_string(const T &value)
template function to convert objects to string for logging
Definition utils.h:369
< nsga2 selection operator for getting the front
Definition bandit.cpp:4