Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
bind_dataset.cpp
Go to the documentation of this file.
1#include "module.h"
2#include "../data/data.h"
3#include "../types.h"
4#include "../data/io.h"
5namespace py = pybind11;
6namespace br = Brush;
7namespace nl = nlohmann;
8
9void bind_dataset(py::module & m)
10{
11 py::class_<br::Data::Dataset>(m, "Dataset")
12 // construct from X, feature names (and optional validation and batch sizes) with constructor 3.
13 .def(py::init([](const Ref<const ArrayXXf>& X,
14 const vector<string>& feature_names=vector<string>(),
15 const vector<string>& feature_types=vector<string>(),
16 const bool c=false,
17 const float validation_size=0.0,
18 const float batch_size=1.0,
19 const bool shuffle_split=false){
20 return br::Data::Dataset(
21 X, feature_names, feature_types, c,
22 validation_size, batch_size, shuffle_split);
23 }),
24 py::arg("X"),
25 py::arg("feature_names") = vector<string>(),
26 py::arg("feature_types") = vector<string>(),
27 py::arg("c") = false,
28 py::arg("validation_size") = 0.0,
29 py::arg("batch_size") = 1.0,
30 py::arg("shuffle_split") = false
31 )
32 // construct from X, y, feature names (and optional validation and batch sizes) with constructor 2.
33 .def(py::init([](const Ref<const ArrayXXf>& X,
34 const Ref<const ArrayXf>& y,
35 const vector<string>& feature_names=vector<string>(),
36 const vector<string>& feature_types=vector<string>(),
37 const bool c=false,
38 const float validation_size=0.0,
39 const float batch_size=1.0,
40 const bool shuffle_split=false){
41 return br::Data::Dataset(
42 X, y, feature_names, {}, feature_types,
43 c, validation_size, batch_size, shuffle_split);
44 }),
45 py::arg("X"),
46 py::arg("y"),
47 py::arg("feature_names") = vector<string>(),
48 py::arg("feature_types") = vector<string>(),
49 py::arg("c") = false,
50 py::arg("validation_size") = 0.0,
51 py::arg("batch_size") = 1.0,
52 py::arg("shuffle_split") = false
53 )
54 // construct from X, feature names, but copying the feature types from a
55 // reference dataset with constructor 4. Useful for predicting (specially
56 // because the user can provide a single element matrix, or an array with
57 // no feature names).
58 .def(py::init([](const Ref<const ArrayXXf>& X,
59 const br::Data::Dataset& ref_dataset,
60 const vector<string>& feature_names){
61 return br::Data::Dataset(X, ref_dataset, feature_names);
62 }),
63 py::arg("X"),
64 py::arg("ref_dataset"),
65 py::arg("feature_names")
66 )
67
68 .def("get_feature_types", &br::Data::Dataset::get_feature_types)
69 .def("get_feature_names", [](const br::Data::Dataset &d) {return d.feature_names; }) // wrapping it into a function to keep consistent with get_feature_types. brush feature types are not native to python, so that's why we need that function to cast it to something python can understand.
70
71 .def_readwrite("y", &br::Data::Dataset::y) // TODO: should this be read only?
72
73 // .def_readwrite("features", &br::Data::Dataset::features)
74 .def("get_n_samples", &br::Data::Dataset::get_n_samples)
75 .def("get_n_features", &br::Data::Dataset::get_n_features)
76 .def("print", &br::Data::Dataset::print)
77 .def("get_batch", &br::Data::Dataset::get_batch)
78 .def("get_training_data", &br::Data::Dataset::get_training_data)
79 .def("get_validation_data", &br::Data::Dataset::get_validation_data)
80 .def("get_batch_size", &br::Data::Dataset::get_batch_size)
81 .def("set_batch_size", &br::Data::Dataset::set_batch_size)
82 .def("split", &br::Data::Dataset::split)
83 .def("get_X", &br::Data::Dataset::get_X)
84 ;
85
86 m.def("read_csv", &br::Data::read_csv, py::arg("path"), py::arg("target"), py::arg("sep")=',');
87}
void bind_dataset(py::module &m)
holds variable type data.
Definition data.h:51
Dataset get_validation_data() const
Definition data.cpp:215
int get_n_samples() const
Definition data.h:225
auto get_X() const
Definition data.h:212
Dataset get_batch() const
select random subset of data for training weights.
Definition data.cpp:187
vector< string > get_feature_types() const
Definition data.cpp:217
void print() const
Definition data.h:196
int get_n_features() const
Definition data.h:231
std::vector< string > feature_names
names of the feature types as string representations.
Definition data.h:70
ArrayXf y
length N array, the target label
Definition data.h:82
float get_batch_size()
Definition data.cpp:384
std::array< Dataset, 2 > split(const ArrayXb &mask) const
Definition data.cpp:203
Dataset get_training_data() const
Definition data.cpp:214
void set_batch_size(float new_size)
Definition data.cpp:385
Dataset read_csv(const std::string &path, const std::string &target, char sep)
read csv file into Data.
Definition io.cpp:14
< nsga2 selection operator for getting the front
Definition bandit.cpp:4