18 #include <shogun/lib/common.h>
21 using namespace Eigen;
36 void clean(ArrayXf& x);
37 void clean(VectorXf& x);
39 std::string
ltrim(std::string str,
const std::string& chars =
"\t\n\v\f\r ");
41 std::string
rtrim(std::string str,
const std::string& chars =
"\t\n\v\f\r ");
43 std::string
trim(std::string str,
const std::string& chars =
"\t\n\v\f\r ");
47 bool in(
const vector<T> v,
const T&
i)
49 return std::find(v.begin(), v.end(),
i) != v.end();
53 float median(
const ArrayXf& v);
56 float variance(
const ArrayXf& v,
float mean);
62 float skew(
const ArrayXf& v);
68 float covariance(
const ArrayXf& x,
const ArrayXf& y);
71 float slope(
const ArrayXf& x,
const ArrayXf& y);
77 float mad(
const ArrayXf& x);
81 vector<size_t>
argsort(
const vector<T> &v,
bool ascending=
true)
84 vector<size_t> idx(v.size());
85 std::iota(idx.begin(), idx.end(), 0);
90 sort(idx.begin(), idx.end(),
91 [&v](
size_t i1,
size_t i2) {return v[i1] < v[i2];});
95 sort(idx.begin(), idx.end(),
96 [&v](
size_t i1,
size_t i2) {return v[i1] > v[i2];});
110 explicit Timer(
bool run =
false);
114 std::chrono::duration<float> Elapsed()
const;
116 template <
typename T,
typename Traits>
118 std::basic_ostream<T, Traits>& out,
const Timer& timer)
120 return out << timer.
Elapsed().count();
124 high_resolution_clock::time_point
_start;
129 template <
typename T>
136 for(x = 0; x < w.size(); ++x)
139 for(x = 0; x < w.size(); ++x)
140 w_new.push_back(exp(w[x])/sum);
150 , remove_offset(rm_offset)
160 template <
typename T>
161 void fit(
const MatrixBase<T>& X,
const vector<char>& dt)
166 for (
unsigned int i=0;
i<X.rows(); ++
i)
173 offset.push_back(
float(X.row(
i).mean()));
176 offset.push_back(0.0);
181 (X.row(
i).array() - offset.at(
i))
183 .sum()/(X.row(
i).size()-1)
190 template <
typename T>
194 for (
unsigned int i=0;
i<X.rows(); ++
i)
202 if (this->scale_all || dtypes.at(
i)==
'f')
205 X.row(
i) = X.row(
i).array() - offset.at(
i);
207 X.row(
i) = X.row(
i).array()/scale.at(
i);
214 template <
typename T>
219 for (
unsigned int i=0;
i<B.rows(); ++
i)
226 if (this->scale_all || dtypes.at(
i)==
'f')
229 B.row(
i) = B.row(
i).array()/scale.at(
i);
237 auto tmp_map = Map<Eigen::Matrix<T,Dynamic,1>>(B.data(), B.size());
238 this->adjust_weights(tmp_map);
244 auto tmp_map = Map<Eigen::Matrix<T,Dynamic,1>>(B.data(), B.size());
245 this->adjust_weights(tmp_map);
248 template <
typename T>
255 float adjustment = 0;
257 for (
unsigned int i=0;
i<Bn.size(); ++
i)
265 if (this->scale_all || dtypes.at(
i)==
'f')
268 adjustment += b*offset.at(
i)/scale.at(
i);
271 return init_offset - adjustment;
273 template <
typename T>
276 auto w = Map<const Eigen::Matrix<T,Dynamic,1>>(Bn.data(), Bn.size());
277 return this->adjust_offset(w, init_offset);
280 template <
typename T>
283 auto w = Map<const Eigen::Matrix<T,Dynamic,1>>(Bn.data(), Bn.size());
284 return this->adjust_offset(w, init_offset);
288 template <
typename T>
291 cout <<
"inverting X = " << X << endl;
293 for (
unsigned int i=0;
i<X.rows(); ++
i)
301 if (this->scale_all || dtypes.at(
i)==
'f')
303 cout <<
"X.row(i) = X.row(i).array()*scale.at(i) : \n\t";
304 cout <<
" = " << X.row(
i).array() <<
"*" << scale.at(
i) << endl;
306 X.row(
i) = X.row(
i).array()*scale.at(
i);
307 cout <<
"X.row(i) = X.row(i).array() + offset.at(i) : \n\t";
308 cout <<
" = " << X.row(
i).array() <<
" + " << offset.at(
i) << endl;
309 X.row(
i) = X.row(
i).array() + offset.at(
i);
314 template <
typename T>
316 const vector<char>& dtypes)
318 this->fit(X, dtypes);
335 template <
typename T>
338 std::sort(w.begin(),w.end());
339 typename vector<T>::iterator it;
341 w.resize(std::distance(w.begin(), it));
346 template <
typename T>
349 vector<T> wv( w.data(), w.data()+w.size());
354 template <
typename T>
357 vector<T> wv( w.data(), w.data()+w.size());
362 template <
typename T>
365 vector<T> wv( w.data(), w.data()+w.rows()*w.cols());
373 MatrixXf
corrcoef(
const MatrixXf& X);
394 void update(
int index,
401 unsigned md_complexity,
402 unsigned md_num_params,
421 template <
typename T>
424 std::stringstream ss;
429 template <
typename T>
432 std::ostringstream out;
434 out << std::fixed << a_value;
439 std::string
ravel(
const vector<string>& v,
string sep=
",");
friend std::basic_ostream< T, Traits > & operator<<(std::basic_ostream< T, Traits > &out, const Timer &timer)
std::chrono::duration< float > Elapsed() const
high_resolution_clock::time_point _start
std::chrono::high_resolution_clock high_resolution_clock
std::chrono::seconds seconds
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
float condition_number(const MatrixXf &X)
returns the condition number of a matrix.
std::string ltrim(std::string str, const std::string &chars)
float skew(const ArrayXf &v)
calculate skew
float mad(const ArrayXf &x)
median absolute deviation
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
float slope(const ArrayXf &x, const ArrayXf &y)
slope of x/y
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
vector< T > softmax(const vector< T > &w)
return the softmax transformation of a vector.
std::string ravel(const vector< string > &v, string sep)
takes a vector string and returns it as a delimited string.
std::string trim(std::string str, const std::string &chars)
float mean_square_corrcoef(const MatrixXf &X)
float covariance(const ArrayXf &x, const ArrayXf &y)
covariance of x and y
int argmiddle(vector< float > &v)
returns the (first) index of the element with the middlest value in v
MatrixXf corrcoef(const MatrixXf &X)
returns the pearson correlation coefficients of matrix.
float pearson_correlation(const ArrayXf &x, const ArrayXf &y)
the normalized covariance of x and y
vector< char > find_dtypes(const MatrixXf &X)
determines data types of columns of matrix X.
vector< T > unique(Array< T, -1, 1 > w)
returns unique elements in 1d Eigen array
float kurtosis(const ArrayXf &v)
calculate kurtosis
float median(const ArrayXf &v)
calculate median
std::string to_string(const T a_value, const int n)
vector< size_t > argsort(const vector< T > &v, bool ascending=true)
return indices that sort a vector
bool in(const vector< T > v, const T &i)
check if element is in vector.
std::string rtrim(std::string str, const std::string &chars)
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
void clean(ArrayXf &x)
limits node output to be between MIN_FLT and MAX_FLT
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Log_Stats, generation, time, min_loss, min_loss_v, med_loss, med_loss_v, med_size, med_complexity, med_num_params, med_dim)
vector< unsigned > med_size
vector< float > min_loss_v
vector< float > med_loss_v
vector< unsigned > med_num_params
vector< unsigned > med_dim
vector< unsigned > med_complexity
normalizes a matrix to unit variance, 0 mean centered.
void fit_normalize(MatrixBase< T > &X, const vector< char > &dtypes)
fit then normalize
void adjust_weights(shogun::SGVector< T > &B) const
void adjust_weights(vector< T > &B) const
float adjust_offset(const shogun::SGVector< T > &Bn, float init_offset) const
float adjust_offset(const MatrixBase< T > &Bn, float init_offset) const
Normalizer(bool sa=true, bool rm_offset=true)
float adjust_offset(const vector< T > &Bn, float init_offset) const
void fit(const MatrixBase< T > &X, const vector< char > &dt)
fit the scale and offset of data.
void adjust_weights(MatrixBase< T > &B) const
return weights of a linear model, y = B*X, given weights of
void invert(MatrixBase< T > &X) const
inverse normalize a matrix.
void normalize(MatrixBase< T > &X) const
normalize matrix.