8#include <unordered_set> 
   13string PBSTR = 
"====================";
 
   20    x = (isinf(x)).select(
MAX_FLT,x);
 
   21    x = (isnan(x)).select(0,x);
 
 
   24std::string 
ltrim(std::string str, 
const std::string& chars)
 
   26    str.erase(0, str.find_first_not_of(chars));
 
 
   30std::string 
rtrim(std::string str, 
const std::string& chars)
 
   32    str.erase(str.find_last_not_of(chars) + 1);
 
 
   36std::string 
trim(std::string str, 
const std::string& chars)
 
 
   43    vector<type_index> dtypes;
 
   49    std::map<float, bool> uniqueMap;
 
   50    for(i = 0; i < X.cols(); i++)
 
   56        for(j = 0; j < X.cols(); j++)
 
   58            if(X(i, j) != 0 && X(i, j) != 1)
 
   60            if(X(i,j) != floor(X(i, j)) && X(i,j) != ceil(X(i,j)))
 
   61                isCategorical = 
false;
 
   63                uniqueMap[X(i, j)] = 
true;
 
   67            dtypes.push_back(
typeid(
ArrayXb));
 
   70            if(isCategorical && uniqueMap.size() < 10)
 
   71                dtypes.push_back(
typeid( 
ArrayXi ));    
 
   73                dtypes.push_back(
typeid(ArrayXf));
 
 
   92    _start = high_resolution_clock::now();
 
 
   96    return high_resolution_clock::now() - 
_start;
 
 
  105    for (
unsigned int i=0; i<X.cols(); ++i)
 
  108        VectorXf tmp = X.col(i).array()-X.col(i).mean();
 
  110        scale.push_back(std::sqrt((tmp.array()).square().sum()/(tmp.size())));
 
  111        offset.push_back(X.col(i).mean());
 
 
  120    for (
unsigned int i=0; i<X.cols(); ++i)
 
  122        if (std::isinf(
scale.at(i)))
 
  124            X.col(i) = VectorXf::Zero(X.col(i).size());
 
  130            X.col(i) = X.col(i).array() - 
offset.at(i);
 
  132                X.col(i) = X.col(i).array()/
scale.at(i);
 
 
  165string to_string(const T& value)
 
  167    std::stringstream ss;
 
  179    BDCSVD<MatrixXf> svd(X);
 
  183    ArrayXf svals = svd.singularValues();
 
  187        cond= svals(0) / svals(svals.size()-1);
 
 
  202    MatrixXf centered = X.colwise() - X.rowwise().mean();
 
  206    MatrixXf cov = ( centered * centered.adjoint()) / float(X.cols() - 1);
 
  208    VectorXf tmp = 1/cov.diagonal().array().sqrt();
 
  209    auto d = tmp.asDiagonal();
 
 
  221    MatrixXf tmp = 
corrcoef(X).triangularView<StrictlyUpper>();
 
  222    float N = tmp.rows()*(tmp.rows()-1)/2;
 
  224    return tmp.array().square().sum()/N;
 
 
  234                       unsigned md_complexity,
 
  236                       unsigned mx_complexity
 
  240    time.push_back(timer_count);
 
 
  282        { 
typeid(int) , 
"int" },
 
  283        { 
typeid(float) , 
"float" },
 
  284        { 
typeid(bool) , 
"bool" },
 
  285        { 
typeid(ArrayXf) , 
"ArrayXf" },
 
  286        { 
typeid(
ArrayXi) , 
"ArrayXi" },
 
  287        { 
typeid(
ArrayXb) , 
"ArrayXb" }
 
 
  305    std::vector<float>::iterator middle = x.begin() + x.size()/2;
 
  307    nth_element(x.begin(), middle, x.end());
 
  309    std::vector<float>::iterator it = std::find(v.begin(), v.end(), *middle);
 
  311    std::vector<float>::size_type pos = std::distance(v.begin(), it);
 
 
  319    return pow((v - v.mean()), 2).mean();
 
 
  325    float mean = v.mean();
 
  326    ArrayXf tmp = mean*ArrayXf::Ones(v.size());
 
  328    float thirdMoment = pow((v - tmp), 3).mean();
 
  329    float variance = pow((v - tmp), 2).mean();
 
  331    return thirdMoment/sqrt(pow(
variance, 3));
 
 
  337    float mean = v.mean();
 
  338    ArrayXf tmp = mean*ArrayXf::Ones(v.size());
 
  340    float fourthMoment = pow((v - tmp), 4).mean();
 
  341    float variance = pow((v - tmp), 2).mean();
 
  343    return fourthMoment/pow(
variance, 2);
 
 
  348    float meanX = x.mean();
 
  349    float meanY = y.mean();
 
  352    ArrayXf tmp1 = meanX*ArrayXf::Ones(x.size());
 
  353    ArrayXf tmp2 = meanY*ArrayXf::Ones(y.size());
 
  355    return ((x - tmp1)*(y - tmp2)).mean();
 
 
  359float slope(
const ArrayXf& x, 
const ArrayXf& y)
 
 
  373float mad(
const ArrayXf& x) 
 
  377    float x_median = 
median(x);
 
  379    ArrayXf dev(x.size());
 
  380    for (
int i =0; i < x.size(); ++i)
 
  381        dev(i) = fabs(x(i) - x_median);
 
 
  388                          const std::string& replace)
 
  391    while ((pos = subject.find(search, pos)) != std::string::npos) {
 
  392         subject.replace(pos, search.length(), replace);
 
  393         pos += replace.length();
 
 
  399                          const std::string& replace)
 
  402    while ((pos = subject.find(search, pos)) != std::string::npos) {
 
  403         subject.replace(pos, search.length(), replace);
 
  404         pos += replace.length();
 
 
  411    auto tmp = mask.cast<
int>();
 
  413    for (
int i = 0; i < mask.size(); ++i)
 
 
  423    tuple<vector<size_t>,vector<size_t>> indices({},{});
 
  424    for (
int i = 0; i < mask.size(); ++i)
 
  427            std::get<0>(indices).push_back(i);
 
  429            std::get<1>(indices).push_back(i);
 
 
std::chrono::duration< float > Elapsed() const
 
high_resolution_clock::time_point _start
 
namespace containing various utility functions
 
float mean_square_corrcoef(const MatrixXf &X)
 
std::string ReplaceString(std::string subject, const std::string &search, const std::string &replace)
find and replace string
 
MatrixXf corrcoef(const MatrixXf &X)
returns the pearson correlation coefficients of matrix.
 
vector< type_index > get_dtypes(MatrixXf &X)
calculates data types for each column of X
 
float slope(const ArrayXf &x, const ArrayXf &y)
slope of x/y
 
float mad(const ArrayXf &x)
median absolute deviation
 
float condition_number(const MatrixXf &X)
returns true for elements of x that are infinite
 
std::string ltrim(std::string str, const std::string &chars)
 
float skew(const ArrayXf &v)
calculate skew
 
float pearson_correlation(const ArrayXf &x, const ArrayXf &y)
the normalized covariance of x and y
 
tuple< vector< size_t >, vector< size_t > > mask_to_indices(const ArrayXb &mask)
returns 2 indices: first where mask is true, and second where mask is false.
 
Scalar median(const T &v)
calculate median
 
void clean(ArrayXf &x)
limits node output to be between MIN_FLT and MAX_FLT
 
float kurtosis(const ArrayXf &v)
calculate kurtosis
 
TypeMap< std::string > type_names
 
std::string rtrim(std::string str, const std::string &chars)
 
void ReplaceStringInPlace(std::string &subject, const std::string &search, const std::string &replace)
string find and replace in place
 
std::string trim(std::string str, const std::string &chars)
 
float variance(const ArrayXf &v)
calculate variance
 
std::map< std::type_index, T > TypeMap
 
float covariance(const ArrayXf &x, const ArrayXf &y)
covariance of x and y
 
int argmiddle(vector< float > &v)
returns the (first) index of the element with the middlest value in v
 
vector< size_t > mask_to_index(const ArrayXb &mask)
convert a boolean mask to an index array
 
< nsga2 selection operator for getting the front
 
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
 
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
 
vector< unsigned > max_size
 
void update(int index, float timer_count, float bst_score, float bst_score_v, float md_score, float md_score_v, unsigned md_size, unsigned md_complexity, unsigned mx_size, unsigned mx_complexity)
 
vector< unsigned > max_complexity
 
vector< float > med_score_v
 
vector< unsigned > med_size
 
vector< float > med_score
 
vector< float > best_score_v
 
vector< unsigned > med_complexity
 
vector< float > best_score
 
void fit(MatrixXf &X, const vector< char > &dt)
fit the scale and offset of data.
 
void fit_normalize(MatrixXf &X, const vector< char > &dtypes)
 
void normalize(MatrixXf &X)
normalize matrix.