8 #include <unordered_set>
14 string PBSTR =
"====================";
22 x = (
isnan(x)).select(0,x);
26 ArrayXf y = ArrayXf(x);
31 std::string
ltrim(std::string str,
const std::string& chars)
33 str.erase(0, str.find_first_not_of(chars));
37 std::string
rtrim(std::string str,
const std::string& chars)
39 str.erase(str.find_last_not_of(chars) + 1);
43 std::string
trim(std::string str,
const std::string& chars)
57 std::map<float, bool> uniqueMap;
58 for(
i = 0;
i < X.rows();
i++)
64 for(j = 0; j < X.cols(); j++)
66 if(X(
i, j) != 0 && X(
i, j) != 1)
68 if(X(
i,j) != floor(X(
i, j)) && X(
i,j) != ceil(X(
i,j)))
69 isCategorical =
false;
71 uniqueMap[X(
i, j)] =
true;
75 dtypes.push_back(
'b');
78 if(isCategorical && uniqueMap.size() < 10)
79 dtypes.push_back(
'c');
81 dtypes.push_back(
'f');
92 vector<float> x(v.size());
93 x.assign(v.data(),v.data()+v.size());
95 size_t n = x.size()/2;
97 nth_element(x.begin(),x.begin()+n,x.end());
99 if (x.size() % 2 == 0) {
100 nth_element(x.begin(),x.begin()+n-1,x.end());
101 return (x.at(n) + x.at(n-1)) / 2;
114 std::vector<float>::iterator middle = x.begin() + x.size()/2;
116 nth_element(x.begin(), middle, x.end());
118 std::vector<float>::iterator it = std::find(v.begin(), v.end(), *middle);
120 std::vector<float>::size_type pos = std::distance(v.begin(), it);
129 ArrayXf tmp = mean*ArrayXf::Ones(v.size());
130 return pow((v - tmp), 2).mean();
136 float mean = v.mean();
143 float mean = v.mean();
144 ArrayXf tmp = mean*ArrayXf::Ones(v.size());
146 float thirdMoment = pow((v - tmp), 3).mean();
147 float variance = pow((v - tmp), 2).mean();
149 return thirdMoment/sqrt(pow(
variance, 3));
155 float mean = v.mean();
156 ArrayXf tmp = mean*ArrayXf::Ones(v.size());
158 float fourthMoment = pow((v - tmp), 4).mean();
159 float variance = pow((v - tmp), 2).mean();
161 return fourthMoment/pow(
variance, 2);
166 float meanX = x.mean();
167 float meanY = y.mean();
170 ArrayXf tmp1 = meanX*ArrayXf::Ones(x.size());
171 ArrayXf tmp2 = meanY*ArrayXf::Ones(y.size());
173 return ((x - tmp1)*(y - tmp2)).mean();
177 float slope(
const ArrayXf& x,
const ArrayXf& y)
189 float mad(
const ArrayXf& x)
193 float x_median =
median(x);
195 ArrayXf dev(x.size());
196 for (
int i =0;
i < x.size(); ++
i)
197 dev(
i) = fabs(x(
i) - x_median);
209 _start = high_resolution_clock::now();
213 return high_resolution_clock::now() -
_start;
220 for (
unsigned i =0;
i < infs.size(); ++
i)
229 for (
unsigned i =0;
i < nans.size(); ++
i)
238 BDCSVD<MatrixXf> svd(X);
240 ArrayXf svals = svd.singularValues();
243 cond= svals(0) / svals(svals.size()-1);
256 MatrixXf centered = X.colwise() - X.rowwise().mean();
258 MatrixXf cov = ( centered * centered.adjoint()) /
float(X.cols() - 1);
259 VectorXf tmp = 1/cov.diagonal().array().sqrt();
260 auto d = tmp.asDiagonal();
268 MatrixXf tmp =
corrcoef(X).triangularView<StrictlyUpper>();
269 float N = tmp.rows()*(tmp.rows()-1)/2;
271 return tmp.array().square().sum()/N;
281 unsigned md_complexity,
282 unsigned md_num_params,
286 time.push_back(timer_count);
297 std::string
ravel(
const vector<string>& v,
string sep)
300 for (
int i = 0;
i < v.size(); ++
i)
303 if (
i < v.size() - 1)
std::chrono::duration< float > Elapsed() const
high_resolution_clock::time_point _start
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
float condition_number(const MatrixXf &X)
returns the condition number of a matrix.
std::string ltrim(std::string str, const std::string &chars)
float skew(const ArrayXf &v)
calculate skew
float mad(const ArrayXf &x)
median absolute deviation
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
float slope(const ArrayXf &x, const ArrayXf &y)
slope of x/y
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
std::string ravel(const vector< string > &v, string sep)
takes a vector string and returns it as a delimited string.
std::string trim(std::string str, const std::string &chars)
float mean_square_corrcoef(const MatrixXf &X)
float covariance(const ArrayXf &x, const ArrayXf &y)
covariance of x and y
int argmiddle(vector< float > &v)
returns the (first) index of the element with the middlest value in v
MatrixXf corrcoef(const MatrixXf &X)
returns the pearson correlation coefficients of matrix.
float pearson_correlation(const ArrayXf &x, const ArrayXf &y)
the normalized covariance of x and y
vector< char > find_dtypes(const MatrixXf &X)
determines data types of columns of matrix X.
float kurtosis(const ArrayXf &v)
calculate kurtosis
float median(const ArrayXf &v)
calculate median
std::string rtrim(std::string str, const std::string &chars)
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
void clean(ArrayXf &x)
limits node output to be between MIN_FLT and MAX_FLT
vector< unsigned > med_size
vector< float > min_loss_v
vector< float > med_loss_v
vector< unsigned > med_num_params
vector< unsigned > med_dim
void update(int index, float timer_count, float bst_score, float bst_score_v, float md_score, float md_loss_v, unsigned md_size, unsigned md_complexity, unsigned md_num_params, unsigned md_dim)
vector< unsigned > med_complexity