8#include <unordered_set>
13string PBSTR =
"====================";
20 x = (isinf(x)).select(
MAX_FLT,x);
21 x = (isnan(x)).select(0,x);
24std::string
ltrim(std::string str,
const std::string& chars)
26 str.erase(0, str.find_first_not_of(chars));
30std::string
rtrim(std::string str,
const std::string& chars)
32 str.erase(str.find_last_not_of(chars) + 1);
36std::string
trim(std::string str,
const std::string& chars)
43 vector<type_index> dtypes;
49 std::map<float, bool> uniqueMap;
50 for(i = 0; i < X.cols(); i++)
56 for(j = 0; j < X.cols(); j++)
58 if(X(i, j) != 0 && X(i, j) != 1)
60 if(X(i,j) != floor(X(i, j)) && X(i,j) != ceil(X(i,j)))
61 isCategorical =
false;
63 uniqueMap[X(i, j)] =
true;
67 dtypes.push_back(
typeid(
ArrayXb));
70 if(isCategorical && uniqueMap.size() < 10)
71 dtypes.push_back(
typeid(
ArrayXi ));
73 dtypes.push_back(
typeid(ArrayXf));
92 _start = high_resolution_clock::now();
96 return high_resolution_clock::now() -
_start;
105 for (
unsigned int i=0; i<X.cols(); ++i)
108 VectorXf tmp = X.col(i).array()-X.col(i).mean();
110 scale.push_back(std::sqrt((tmp.array()).square().sum()/(tmp.size())));
111 offset.push_back(X.col(i).mean());
120 for (
unsigned int i=0; i<X.cols(); ++i)
122 if (std::isinf(
scale.at(i)))
124 X.col(i) = VectorXf::Zero(X.col(i).size());
130 X.col(i) = X.col(i).array() -
offset.at(i);
132 X.col(i) = X.col(i).array()/
scale.at(i);
165string to_string(const T& value)
167 std::stringstream ss;
179 BDCSVD<MatrixXf> svd(X);
183 ArrayXf svals = svd.singularValues();
187 cond= svals(0) / svals(svals.size()-1);
202 MatrixXf centered = X.colwise() - X.rowwise().mean();
206 MatrixXf cov = ( centered * centered.adjoint()) / float(X.cols() - 1);
208 VectorXf tmp = 1/cov.diagonal().array().sqrt();
209 auto d = tmp.asDiagonal();
221 MatrixXf tmp =
corrcoef(X).triangularView<StrictlyUpper>();
222 float N = tmp.rows()*(tmp.rows()-1)/2;
224 return tmp.array().square().sum()/N;
234 unsigned md_complexity,
236 unsigned mx_complexity
240 time.push_back(timer_count);
282 {
typeid(int) ,
"int" },
283 {
typeid(float) ,
"float" },
284 {
typeid(bool) ,
"bool" },
285 {
typeid(ArrayXf) ,
"ArrayXf" },
286 {
typeid(
ArrayXi) ,
"ArrayXi" },
287 {
typeid(
ArrayXb) ,
"ArrayXb" }
305 std::vector<float>::iterator middle = x.begin() + x.size()/2;
307 nth_element(x.begin(), middle, x.end());
309 std::vector<float>::iterator it = std::find(v.begin(), v.end(), *middle);
311 std::vector<float>::size_type pos = std::distance(v.begin(), it);
319 return pow((v - v.mean()), 2).mean();
325 float mean = v.mean();
326 ArrayXf tmp = mean*ArrayXf::Ones(v.size());
328 float thirdMoment = pow((v - tmp), 3).mean();
329 float variance = pow((v - tmp), 2).mean();
331 return thirdMoment/sqrt(pow(
variance, 3));
337 float mean = v.mean();
338 ArrayXf tmp = mean*ArrayXf::Ones(v.size());
340 float fourthMoment = pow((v - tmp), 4).mean();
341 float variance = pow((v - tmp), 2).mean();
343 return fourthMoment/pow(
variance, 2);
348 float meanX = x.mean();
349 float meanY = y.mean();
352 ArrayXf tmp1 = meanX*ArrayXf::Ones(x.size());
353 ArrayXf tmp2 = meanY*ArrayXf::Ones(y.size());
355 return ((x - tmp1)*(y - tmp2)).mean();
359float slope(
const ArrayXf& x,
const ArrayXf& y)
373float mad(
const ArrayXf& x)
377 float x_median =
median(x);
379 ArrayXf dev(x.size());
380 for (
int i =0; i < x.size(); ++i)
381 dev(i) = fabs(x(i) - x_median);
388 const std::string& replace)
391 while ((pos = subject.find(search, pos)) != std::string::npos) {
392 subject.replace(pos, search.length(), replace);
393 pos += replace.length();
399 const std::string& replace)
402 while ((pos = subject.find(search, pos)) != std::string::npos) {
403 subject.replace(pos, search.length(), replace);
404 pos += replace.length();
411 auto tmp = mask.cast<
int>();
413 for (
int i = 0; i < mask.size(); ++i)
423 tuple<vector<size_t>,vector<size_t>> indices({},{});
424 for (
int i = 0; i < mask.size(); ++i)
427 std::get<0>(indices).push_back(i);
429 std::get<1>(indices).push_back(i);
std::chrono::duration< float > Elapsed() const
high_resolution_clock::time_point _start
namespace containing various utility functions
float mean_square_corrcoef(const MatrixXf &X)
std::string ReplaceString(std::string subject, const std::string &search, const std::string &replace)
find and replace string
MatrixXf corrcoef(const MatrixXf &X)
returns the pearson correlation coefficients of matrix.
vector< type_index > get_dtypes(MatrixXf &X)
calculates data types for each column of X
float slope(const ArrayXf &x, const ArrayXf &y)
slope of x/y
float mad(const ArrayXf &x)
median absolute deviation
float condition_number(const MatrixXf &X)
returns true for elements of x that are infinite
std::string ltrim(std::string str, const std::string &chars)
float skew(const ArrayXf &v)
calculate skew
float pearson_correlation(const ArrayXf &x, const ArrayXf &y)
the normalized covariance of x and y
tuple< vector< size_t >, vector< size_t > > mask_to_indices(const ArrayXb &mask)
returns 2 indices: first where mask is true, and second where mask is false.
Scalar median(const T &v)
calculate median
void clean(ArrayXf &x)
limits node output to be between MIN_FLT and MAX_FLT
float kurtosis(const ArrayXf &v)
calculate kurtosis
TypeMap< std::string > type_names
std::string rtrim(std::string str, const std::string &chars)
void ReplaceStringInPlace(std::string &subject, const std::string &search, const std::string &replace)
string find and replace in place
std::string trim(std::string str, const std::string &chars)
float variance(const ArrayXf &v)
calculate variance
std::map< std::type_index, T > TypeMap
float covariance(const ArrayXf &x, const ArrayXf &y)
covariance of x and y
int argmiddle(vector< float > &v)
returns the (first) index of the element with the middlest value in v
vector< size_t > mask_to_index(const ArrayXb &mask)
convert a boolean mask to an index array
< nsga2 selection operator for getting the front
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
vector< unsigned > max_size
void update(int index, float timer_count, float bst_score, float bst_score_v, float md_score, float md_score_v, unsigned md_size, unsigned md_complexity, unsigned mx_size, unsigned mx_complexity)
vector< unsigned > max_complexity
vector< float > med_score_v
vector< unsigned > med_size
vector< float > med_score
vector< float > best_score_v
vector< unsigned > med_complexity
vector< float > best_score
void fit(MatrixXf &X, const vector< char > &dt)
fit the scale and offset of data.
void fit_normalize(MatrixXf &X, const vector< char > &dtypes)
void normalize(MatrixXf &X)
normalize matrix.