Dataset#

class Dataset#

holds variable type data.

Public Functions

Dataset operator()(const vector<size_t> &idx) const#

return a slice of the data using indices idx

void init()#

call init at the end of constructors to define metafeatures of the data.

map<string, State> make_features(const ArrayXXf &X, const map<string, State> &Z = {}, const vector<string> &vn = {})#

turns input data into a feature map

map<string, State> copy_and_make_features(const ArrayXXf &X, const Dataset &ref_dataset, const vector<string> &vn = {})#

turns input into a feature map, with feature types copied from a reference

inline Dataset(std::map<string, State> &d, const Ref<const ArrayXf> &y_ = ArrayXf(), bool c = false, float validation_size = 0.0, float batch_size = 1.0)#

  1. initialize data from a map.

inline Dataset(const ArrayXXf &X, const Ref<const ArrayXf> &y_ = ArrayXf(), const vector<string> &vn = {}, const map<string, State> &Z = {}, bool c = false, float validation_size = 0.0, float batch_size = 1.0)#

  1. initialize data from a matrix with feature columns.

inline Dataset(const ArrayXXf &X, const vector<string> &vn, bool c = false, float validation_size = 0.0, float batch_size = 1.0)#

  1. initialize data from X and feature names

inline Dataset(const ArrayXXf &X, const Dataset &ref_dataset, const vector<string> &vn, bool c = false)#
inline void print() const#
inline auto get_X() const#
Dataset get_training_data() const#
Dataset get_validation_data() const#
inline int get_n_samples() const#
inline int get_n_features() const#
Dataset get_batch() const#

select random subset of data for training weights.

float get_batch_size()#
void set_batch_size(float new_size)#
std::array<Dataset, 2> split(const ArrayXb &mask) const#
inline State operator[](std::string name) const#

Public Members

std::vector<DataType> unique_data_types#

keeps track of the unique data types in the dataset.

std::vector<DataType> feature_types#

types of data in the features.

std::unordered_map<DataType, vector<string>> features_of_type#

map from data types to features having that type.

std::map<string, State> features#

dataset features, as key value pairs

ArrayXf y#

length N array, the target label

bool classification#

whether this is a classification problem

std::optional<std::reference_wrapper<const ArrayXXf>> Xref#
float validation_size#

percentage of original data used for train. if 0.0, then all data is used for train and validation

bool use_validation#
float batch_size#

percentage of training data size to use in each batch. if 1.0, then all data is used

bool use_batch#