40template<
typename StateRef>
80 std::optional<std::reference_wrapper<const ArrayXXf>>
Xref;
97 const map<string, State>&
Z = {},
98 const vector<string>&
vn = {}
106 const vector<string>&
vn = {}
128 const vector<string>&
vn = {},
129 const map<string, State>&
Z = {},
167 const vector<string>&
vn,
183 fmt::print(
"Dataset contains {} samples and {} features\n",
188 if (std::holds_alternative<ArrayXf>(value))
189 fmt::print(
"{} <ArrayXf>: {}\n",
key, std::get<ArrayXf>(value));
190 else if (std::holds_alternative<ArrayXi>(value))
191 fmt::print(
"{} <ArrayXi>: {}\n",
key, std::get<ArrayXi>(value));
192 else if (std::holds_alternative<ArrayXb>(value))
193 fmt::print(
"{} <ArrayXb>: {}\n",
key, std::get<ArrayXb>(value));
199 if (!
Xref.has_value())
201 return this->Xref.value().get();
211 [&](
auto&&
arg) ->
int {
return int(
arg.size());},
226 if (this->features.find(name) ==
features.end())
228 return this->features.at(name);
240extern const map<DataType,std::type_index>
DataTypeID;
241extern map<std::type_index,DataType>
DataIDType;
246template <>
struct fmt::formatter<
Brush::DataType>: formatter<string_view> {
247 template <
typename FormatContext>
void bind_engine(py::module &m, string name)
holds variable type data.
bool classification
whether this is a classification problem
Dataset get_validation_data() const
std::map< string, State > features
dataset features, as key value pairs
int get_n_samples() const
vector< size_t > training_data_idx
Dataset get_batch() const
select random subset of data for training weights.
std::vector< DataType > feature_types
types of data in the features.
std::unordered_map< DataType, vector< string > > features_of_type
map from data types to features having that type.
float batch_size
percentage of training data size to use in each batch. if 1.0, then all data is used
int get_n_features() const
State operator[](std::string name) const
std::optional< std::reference_wrapper< const ArrayXXf > > Xref
std::vector< DataType > unique_data_types
keeps track of the unique data types in the dataset.
map< string, State > copy_and_make_features(const ArrayXXf &X, const Dataset &ref_dataset, const vector< string > &vn={})
turns input into a feature map, with feature types copied from a reference
Dataset(std::map< string, State > &d, const Ref< const ArrayXf > &y_=ArrayXf(), bool c=false, float validation_size=0.0, float batch_size=1.0)
void init()
call init at the end of constructors to define metafeatures of the data.
float validation_size
percentage of original data used for train. if 0.0, then all data is used for train and validation
Dataset(const ArrayXXf &X, const vector< string > &vn, bool c=false, float validation_size=0.0, float batch_size=1.0)
map< string, State > make_features(const ArrayXXf &X, const map< string, State > &Z={}, const vector< string > &vn={})
turns input data into a feature map
vector< size_t > validation_data_idx
Dataset(const ArrayXXf &X, const Dataset &ref_dataset, const vector< string > &vn, bool c=false)
Dataset(const ArrayXXf &X, const Ref< const ArrayXf > &y_=ArrayXf(), const vector< string > &vn={}, const map< string, State > &Z={}, bool c=false, float validation_size=0.0, float batch_size=1.0)
ArrayXf y
length N array, the target label
std::array< Dataset, 2 > split(const ArrayXb &mask) const
Dataset get_training_data() const
void set_batch_size(float new_size)
Dataset operator()(const vector< size_t > &idx) const
return a slice of the data using indices idx
#define HANDLE_ERROR_THROW(err)
State check_type(const ArrayXf &x)
determines data types of columns of matrix X.
State cast_type(const ArrayXf &x, const StateRef &x_ref)
DataType StateType(const State &arg)
std::variant< ArrayXb, ArrayXi, ArrayXf, ArrayXXb, ArrayXXi, ArrayXXf, TimeSeriesb, TimeSeriesi, TimeSeriesf, ArrayXbJet, ArrayXiJet, ArrayXfJet, ArrayXXbJet, ArrayXXiJet, ArrayXXfJet, TimeSeriesbJet, TimeSeriesiJet, TimeSeriesfJet > State
defines the possible types of data flowing thru nodes.
< nsga2 selection operator for getting the front
ostream & operator<<(ostream &os, DataType n)
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
map< DataType, string > DataTypeName
const map< DataType, std::type_index > DataTypeID
map< std::type_index, DataType > DataIDType
map< string, DataType > DataNameType