40template<
typename StateRef>
84 std::optional<std::reference_wrapper<const ArrayXXf>>
Xref;
103 const map<string, State>& Z = {},
104 const vector<string>& vn = {},
105 const vector<string>& ft = {}
111 const vector<string>& vn = {}
116 const Ref<const ArrayXf>& y_ = ArrayXf(),
134 const Ref<const ArrayXf>& y_ = ArrayXf(),
135 const vector<string>& vn = {},
136 const map<string, State>& Z = {},
137 const vector<string>& ft = {},
153 Xref = optional<reference_wrapper<const ArrayXXf>>{X};
157 Dataset(
const ArrayXXf& X,
const vector<string>& vn,
158 const vector<string>& ft = {},
173 Xref = optional<reference_wrapper<const ArrayXXf>>{X};
180 const vector<string>& vn
191 Xref = optional<reference_wrapper<const ArrayXXf>>{X};
196 fmt::print(
"Dataset contains {} samples and {} features\n",
199 for (
auto& [key, value] : this->features)
201 if (std::holds_alternative<ArrayXf>(value))
202 fmt::print(
"{} <ArrayXf>: {}\n", key, std::get<ArrayXf>(value));
203 else if (std::holds_alternative<ArrayXi>(value))
204 fmt::print(
"{} <ArrayXi>: {}\n", key, std::get<ArrayXi>(value));
205 else if (std::holds_alternative<ArrayXb>(value))
206 fmt::print(
"{} <ArrayXb>: {}\n", key, std::get<ArrayXb>(value));
212 if (!
Xref.has_value())
214 return this->Xref.value().get();
224 [&](
auto&& arg) ->
int {
return int(arg.size());},
235 std::array<Dataset, 2>
split(
const ArrayXb& mask)
const;
239 if (this->features.find(name) ==
features.end())
241 return this->features.at(name);
265extern const map<DataType,std::type_index>
DataTypeID;
266extern map<std::type_index,DataType>
DataIDType;
272 template <
typename FormatContext>
holds variable type data.
Dataset(const ArrayXXf &X, const vector< string > &vn, const vector< string > &ft={}, bool c=false, float validation_size=0.0, float batch_size=1.0, bool shuffle_split=false)
bool classification
whether this is a classification problem
Dataset get_validation_data() const
std::map< string, State > features
dataset features, as key value pairs
int get_n_samples() const
Dataset(std::map< string, State > &d, const Ref< const ArrayXf > &y_=ArrayXf(), bool c=false, float validation_size=0.0, float batch_size=1.0, bool shuffle_split=false)
vector< size_t > training_data_idx
Dataset get_batch() const
select random subset of data for training weights.
std::vector< DataType > feature_types
types of data in the features.
std::unordered_map< DataType, vector< string > > features_of_type
map from data types to features having that type.
float batch_size
percentage of training data size to use in each batch. if 1.0, then all data is used
int get_n_features() const
Dataset(const ArrayXXf &X, const Dataset &ref_dataset, const vector< string > &vn)
State operator[](std::string name) const
std::optional< std::reference_wrapper< const ArrayXXf > > Xref
std::vector< string > feature_names
names of the feature types as string representations.
std::vector< DataType > unique_data_types
keeps track of the unique data types in the dataset.
map< string, State > copy_and_make_features(const ArrayXXf &X, const Dataset &ref_dataset, const vector< string > &vn={})
turns input into a feature map, with feature types copied from a reference
map< string, State > make_features(const ArrayXXf &X, const map< string, State > &Z={}, const vector< string > &vn={}, const vector< string > &ft={})
turns input data into a feature map
void init()
call init at the end of constructors to define metafeatures of the data.
float validation_size
percentage of original data used for train. if 0.0, then all data is used for train and validation
vector< size_t > validation_data_idx
Dataset(const ArrayXXf &X, const Ref< const ArrayXf > &y_=ArrayXf(), const vector< string > &vn={}, const map< string, State > &Z={}, const vector< string > &ft={}, bool c=false, float validation_size=0.0, float batch_size=1.0, bool shuffle_split=false)
ArrayXf y
length N array, the target label
std::array< Dataset, 2 > split(const ArrayXb &mask) const
Dataset get_training_data() const
void set_batch_size(float new_size)
Dataset operator()(const vector< size_t > &idx) const
return a slice of the data using indices idx
holds variable type data.
#define HANDLE_ERROR_THROW(err)
namespace containing Data structures used in Brush
State cast_type(const ArrayXf &x, const StateRef &x_ref)
DataType StateType(const State &arg)
State check_type(const ArrayXf &x, const string t)
determines data types of columns of matrix X.
std::variant< ArrayXb, ArrayXi, ArrayXf, ArrayXXb, ArrayXXi, ArrayXXf, TimeSeriesb, TimeSeriesi, TimeSeriesf, ArrayXbJet, ArrayXiJet, ArrayXfJet, ArrayXXbJet, ArrayXXiJet, ArrayXXfJet, TimeSeriesbJet, TimeSeriesiJet, TimeSeriesfJet > State
defines the possible types of data flowing thru nodes.
< nsga2 selection operator for getting the front
ostream & operator<<(ostream &os, DataType n)
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
map< DataType, string > DataTypeName
const map< DataType, std::type_index > DataTypeID
map< std::type_index, DataType > DataIDType
map< string, DataType > DataNameType
std::variant< ArrayXb, ArrayXi, ArrayXf, ArrayXXb, ArrayXXi, ArrayXXf, TimeSeriesb, TimeSeriesi, TimeSeriesf, ArrayXbJet, ArrayXiJet, ArrayXfJet, ArrayXXbJet, ArrayXXiJet, ArrayXXfJet, TimeSeriesbJet, TimeSeriesiJet, TimeSeriesfJet > State
defines the possible types of data flowing thru nodes.