Dataset#
-
class Dataset#
holds variable type data.
Public Functions
-
void init()#
call init at the end of constructors to define metafeatures of the data.
-
map<string, State> make_features(const ArrayXXf &X, const map<string, State> &Z = {}, const vector<string> &vn = {})#
turns input data into a feature map
-
map<string, State> copy_and_make_features(const ArrayXXf &X, const Dataset &ref_dataset, const vector<string> &vn = {})#
turns input into a feature map, with feature types copied from a reference
-
inline Dataset(std::map<string, State> &d, const Ref<const ArrayXf> &y_ = ArrayXf(), bool c = false, float validation_size = 0.0, float batch_size = 1.0)#
initialize data from a map.
-
inline Dataset(const ArrayXXf &X, const Ref<const ArrayXf> &y_ = ArrayXf(), const vector<string> &vn = {}, const map<string, State> &Z = {}, bool c = false, float validation_size = 0.0, float batch_size = 1.0)#
initialize data from a matrix with feature columns.
-
inline Dataset(const ArrayXXf &X, const vector<string> &vn, bool c = false, float validation_size = 0.0, float batch_size = 1.0)#
initialize data from X and feature names
-
inline Dataset(const ArrayXXf &X, const Dataset &ref_dataset, const vector<string> &vn, bool c = false)#
-
inline void print() const#
-
inline auto get_X() const#
-
inline int get_n_samples() const#
-
inline int get_n_features() const#
-
float get_batch_size()#
-
void set_batch_size(float new_size)#
-
inline State operator[](std::string name) const#
Public Members
-
std::vector<DataType> unique_data_types#
keeps track of the unique data types in the dataset.
-
std::vector<DataType> feature_types#
types of data in the features.
-
std::unordered_map<DataType, vector<string>> features_of_type#
map from data types to features having that type.
-
std::map<string, State> features#
dataset features, as key value pairs
-
ArrayXf y#
length N array, the target label
-
bool classification#
whether this is a classification problem
-
std::optional<std::reference_wrapper<const ArrayXXf>> Xref#
-
float validation_size#
percentage of original data used for train. if 0.0, then all data is used for train and validation
-
bool use_validation#
-
float batch_size#
percentage of training data size to use in each batch. if 1.0, then all data is used
-
bool use_batch#
-
void init()#