Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
metrics.cpp
Go to the documentation of this file.
1#include "metrics.h"
2
3namespace Brush {
4namespace Eval {
5
6/* Scoring functions */
7
9float mse(const VectorXf& y, const VectorXf& yhat, VectorXf& loss,
10 const vector<float>& class_weights)
11{
12 loss = (yhat - y).array().pow(2);
13 return loss.mean();
14}
15
16VectorXf log_loss(const VectorXf& y, const VectorXf& predict_proba,
17 const vector<float>& class_weights)
18{
19 float eps = pow(10,-10);
20
21 VectorXf loss;
22
23 float sum_weights = 0;
24 loss.resize(y.rows());
25 for (unsigned i = 0; i < y.rows(); ++i)
26 {
27 if (predict_proba(i) < eps || 1 - predict_proba(i) < eps)
28 // clip probabilities since log loss is undefined for predict_proba=0 or predict_proba=1
29 loss(i) = -(y(i)*log(eps) + (1-y(i))*log(1-eps));
30 else
31 loss(i) = -(y(i)*log(predict_proba(i)) + (1-y(i))*log(1-predict_proba(i)));
32 if (loss(i)<0)
33 std::runtime_error("loss(i)= " + to_string(loss(i))
34 + ". y = " + to_string(y(i)) + ", predict_proba(i) = "
35 + to_string(predict_proba(i)));
36
37 if (!class_weights.empty())
38 {
39 loss(i) = loss(i) * class_weights.at(y(i));
40 sum_weights += class_weights.at(y(i));
41 }
42 }
43
44 if (sum_weights > 0)
45 loss = loss.array() / sum_weights * y.size(); // normalize weight contributions
46
47 return loss;
48}
49
51float mean_log_loss(const VectorXf& y,
52 const VectorXf& predict_proba, VectorXf& loss,
53 const vector<float>& class_weights)
54{
55 loss = log_loss(y,predict_proba,class_weights);
56 return loss.mean();
57}
58
59float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
60 VectorXf& loss,
61 const vector<float>& class_weights) {
62
63 // get argsort of predict proba
64 vector<int> argsort(predict_proba.size());
65 iota(argsort.begin(), argsort.end(), 0);
66 sort(argsort.begin(), argsort.end(), [&](int i, int j) {
67 return predict_proba[i] > predict_proba[j];
68 });
69
70 float ysum = 0;
71 if (!class_weights.empty())
72 for (int i = 0; i < class_weights.size(); i++) {
73 ysum += y(i) * class_weights.at(y(i));
74 }
75 else
76 ysum = y.sum();
77
78 // Calculate the precision and recall values
79 VectorXf precision(predict_proba.size());
80 VectorXf recall(predict_proba.size());
81
82 float true_positives = 0;
83 float false_positives = 0;
84 float positives = 0;
85
86 for (int i = 0; i < predict_proba.size(); i++) {
87 if (predict_proba[argsort[i]] >= 0.5 && y[argsort[i]] == 1) {
88 true_positives += 1;
89 }
90 else {
91 if (!class_weights.empty())
92 false_positives = class_weights[y(argsort[i])];
93 else
94 false_positives += 1;
95 }
97
99 recall[i] = ysum==0.0 ? 1.0 : true_positives/ysum;
100 }
101
102 // Calculate the average precision score
103 float average_precision = 0;
104 float last_recall = 0;
105
106 for (int i = 0; i < predict_proba.size(); i++) {
107 if (recall[i] != last_recall) {
108 loss[i] = precision[i] * (recall[i] - last_recall);
109 average_precision += loss[i];
111 }
112 }
113
114 return average_precision;
115}
116
117// multinomial log loss
118VectorXf multi_log_loss(const VectorXf& y, const ArrayXXf& predict_proba,
119 const vector<float>& class_weights)
120{
121 // TODO: fix softmax and multiclassification, then implement this
122 VectorXf loss = VectorXf::Zero(y.rows());
123
124 // TODO: needs to be the index of unique elements
125 // get class labels
126 // vector<float> uc = unique( ArrayXi(y.cast<int>()) );
127
128 // float eps = pow(10,-10);
129 // float sum_weights = 0;
130 // for (unsigned i = 0; i < y.rows(); ++i)
131 // {
132 // for (const auto& c : uc)
133 // {
134 // // for specific class
135 // ArrayXf yhat = predict_proba.col(int(c));
136 // /* std::cout << "class " << c << "\n"; */
137
138 // /* float yi = y(i) == c ? 1.0 : 0.0 ; */
139 // /* std::cout << "yi: " << yi << ", yhat(" << i << "): " << yhat(i) ; */
140 // if (y(i) == c)
141 // {
142 // if (yhat(i) < eps || 1 - yhat(i) < eps)
143 // {
144 // // clip probabilities since log loss is undefined for yhat=0 or yhat=1
145 // loss(i) += -log(eps);
146 // }
147 // else
148 // {
149 // loss(i) += -log(yhat(i));
150 // }
151 // /* std::cout << ", loss(" << i << ") = " << loss(i); */
152 // }
153 // /* std::cout << "\n"; */
154 // }
155 // if (!class_weights.empty()){
156 // /* std::cout << "weights.at(y(" << i << ")): " << class_weights.at(y(i)) << "\n"; */
157 // loss(i) = loss(i)*class_weights.at(y(i));
158 // sum_weights += class_weights.at(y(i));
159 // }
160 // }
161 // if (sum_weights > 0)
162 // loss = loss.array() / sum_weights * y.size();
163
164 /* cout << "loss.mean(): " << loss.mean() << "\n"; */
165 /* cout << "loss.sum(): " << loss.sum() << "\n"; */
166 return loss;
167}
168
169float mean_multi_log_loss(const VectorXf& y,
170 const ArrayXXf& predict_proba, VectorXf& loss,
171 const vector<float>& class_weights)
172{
173 loss = multi_log_loss(y, predict_proba, class_weights);
174
175 /* std::cout << "loss: " << loss.transpose() << "\n"; */
176 /* std::cout << "mean loss: " << loss.mean() << "\n"; */
177 return loss.mean();
178}
179
180} // metrics
181} // Brush
void bind_engine(py::module &m, string name)
float mean_log_loss(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
log loss
Definition metrics.cpp:51
float mean_multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the mean multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:169
float average_precision_score(const VectorXf &y, const VectorXf &predict_proba, VectorXf &loss, const vector< float > &class_weights)
Calculates the average precision score between the predicted probabilities and the true labels.
Definition metrics.cpp:59
float mse(const VectorXf &y, const VectorXf &yhat, VectorXf &loss, const vector< float > &class_weights)
mean squared error
Definition metrics.cpp:9
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &predict_proba, const vector< float > &class_weights)
Calculates the multinomial log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:118
VectorXf log_loss(const VectorXf &y, const VectorXf &predict_proba, const vector< float > &class_weights)
Calculates the log loss between the predicted probabilities and the true labels.
Definition metrics.cpp:16
< nsga2 selection operator for getting the front
Definition data.cpp:12
Namespace containing scoring functions for evaluation metrics.