74 for(
int i = 0;
i <
x.size();
i++)
77 if(
x(
i) != 0 &&
x(
i) != 1)
79 if(
x(
i) != floor(
x(
i)) &&
x(
i) != ceil(
x(
i)))
105template<
typename StateRef>
108 if (std::holds_alternative<ArrayXi>(
x_ref))
110 else if (std::holds_alternative<ArrayXb>(
x_ref))
122 std::visit([&](
auto&&
arg)
124 using T = std::decay_t<
decltype(
arg)>;
125 if constexpr ( T::NumDimensions == 1)
127 else if constexpr (T::NumDimensions==2)
136 if (this->
y.size()>0)
138 new_y = this->
y(idx);
170 return std::array<Dataset, 2>{ (*this)(
idx1), (*
this)(
idx2) };
187 fmt::format(
"Error during the initialization of the dataset. It "
188 "does not contain any data\n")
193 for (
const auto& [name, value]: this->
features)
239 const map<string,State>&
Z,
240 const vector<string>&
vn
251 for (
int i = 0;
i <
X.cols(); ++
i)
259 if (
vn.size() !=
X.cols())
261 fmt::format(
"Variable names and data size mismatch: "
262 "{} variable names and {} features in X",
270 for (
int i = 0;
i <
X.cols(); ++
i)
285 const vector<string>&
vn
291 for (
int i = 0;
i <
X.cols(); ++
i)
299 if (
vn.size() !=
X.cols())
301 fmt::format(
"Variable names and data size mismatch: "
302 "{} variable names and {} features in X",
312 fmt::format(
"Reference dataset with incompatible number of variables: "
313 "Reference has {} variable names, but X has {}",
320 for (
int i = 0;
i <
X.cols(); ++
i)
void bind_engine(py::module &m, string name)
holds variable type data.
bool classification
whether this is a classification problem
Dataset get_validation_data() const
std::map< string, State > features
dataset features, as key value pairs
int get_n_samples() const
vector< size_t > training_data_idx
Dataset get_batch() const
select random subset of data for training weights.
std::vector< DataType > feature_types
types of data in the features.
std::unordered_map< DataType, vector< string > > features_of_type
map from data types to features having that type.
float batch_size
percentage of training data size to use in each batch. if 1.0, then all data is used
std::vector< DataType > unique_data_types
keeps track of the unique data types in the dataset.
map< string, State > copy_and_make_features(const ArrayXXf &X, const Dataset &ref_dataset, const vector< string > &vn={})
turns input into a feature map, with feature types copied from a reference
Dataset(std::map< string, State > &d, const Ref< const ArrayXf > &y_=ArrayXf(), bool c=false, float validation_size=0.0, float batch_size=1.0)
void init()
call init at the end of constructors to define metafeatures of the data.
float validation_size
percentage of original data used for train. if 0.0, then all data is used for train and validation
map< string, State > make_features(const ArrayXXf &X, const map< string, State > &Z={}, const vector< string > &vn={})
turns input data into a feature map
vector< size_t > validation_data_idx
ArrayXf y
length N array, the target label
std::array< Dataset, 2 > split(const ArrayXb &mask) const
Dataset get_training_data() const
void set_batch_size(float new_size)
Dataset operator()(const vector< size_t > &idx) const
return a slice of the data using indices idx
vector< size_t > shuffled_index(size_t n)
returns a shuffled index vector of length n
#define HANDLE_ERROR_THROW(err)
std::vector< DataType > StateTypes
State check_type(const ArrayXf &x)
determines data types of columns of matrix X.
TimeSeries< bool > TimeSeriesb
TimeSeries convenience typedefs.
State cast_type(const ArrayXf &x, const StateRef &x_ref)
TimeSeries< float > TimeSeriesf
DataType StateType(const State &arg)
ostream & operator<<(ostream &os, DataType dt)
std::variant< ArrayXb, ArrayXi, ArrayXf, ArrayXXb, ArrayXXi, ArrayXXf, TimeSeriesb, TimeSeriesi, TimeSeriesf, ArrayXbJet, ArrayXiJet, ArrayXfJet, ArrayXXbJet, ArrayXXiJet, ArrayXXfJet, TimeSeriesbJet, TimeSeriesiJet, TimeSeriesfJet > State
defines the possible types of data flowing thru nodes.
TimeSeries< int > TimeSeriesi
namespace containing various utility functions
static map< V, K > reverse_map(const map< K, V > &m)
Given a map from keys to values, creates a new map from values to keys.
string to_string(const T &value)
template function to convert objects to string for logging
void unique_insert(Vector &v, const T &t)
unique insertion into a vector. allows a vector to be used like a set. source: http://www....
vector< size_t > mask_to_index(const ArrayXb &mask)
convert a boolean mask to an index array
< nsga2 selection operator for getting the front
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
Eigen::Array< int, Eigen::Dynamic, Eigen::Dynamic > ArrayXXi
Eigen::Array< bool, Eigen::Dynamic, Eigen::Dynamic > ArrayXXb
map< DataType, string > DataTypeName
const map< DataType, std::type_index > DataTypeID
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
map< std::type_index, DataType > DataIDType
map< string, DataType > DataNameType