Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
io.cpp
Go to the documentation of this file.
1/* FEAT
2copyright 2017 William La Cava
3license: GNU/GPL v3
4*/
5
6#include "io.h"
7#include "../util/utils.h"
8/* #include "rnd.h" */
9#include <unordered_set>
10
11namespace Brush::Data{
12
15 const std::string& path,
16 const std::string& target,
17 char sep
18)
19{
20 std::ifstream indata;
21 indata.open(path);
22 if (!indata.good())
23 HANDLE_ERROR_THROW("Invalid input file " + path + "\n");
24
25 string line;
26 map<string,vector<float>> values;
27 vector<float> targets;
28
29 std::vector<string> names;
30 unsigned rows=0, target_col_num = 0;
31
32 while (std::getline(indata, line))
33 {
34 std::stringstream lineStream(line);
35 std::string cell;
36
37 unsigned col_num=0;
38 while (std::getline(lineStream, cell, sep))
39 {
41
42 if (rows==0) // read in header
43 {
44 if (!cell.compare(target))
46 else
47 names.push_back(cell);
48 }
49 else if (col_num != target_col_num)
50 {
51 auto col_name = names.at(col_num);
52 if (!values.contains(col_name))
53 values[col_name] = {};
54
55 values.at(names.at(col_num)).push_back(std::stod(cell));
56 }
57 else
58 targets.push_back(std::stod(cell));
59
60 ++col_num;
61 }
62 ++rows;
63 }
64
65 auto y = ArrayXf::Map(targets.data(), targets.size());
66 // for (int i = 0; i < targets.size(); ++i)
67 // y(i) = targets.at(i);
68
69 // infer types of features
70 map<string, State> features;
71 for (auto& [key, value] : values)
72 {
73 auto tmp = Map<ArrayXf>(value.data(), value.size());
74
75 if (tmp.size() != y.size())
76 HANDLE_ERROR_THROW("different numbers of samples in X and y");
77 features[key] = check_type(tmp);
78
79 }
80
81 // check if endpoint is binary
82 bool binary_endpoint = (y.array() == 0 || y.array() == 1).all();
83
84 // using constructor 1. (initializing data from a map)
85 auto result = Dataset(features, y, binary_endpoint);
86
87 return result;
88}
89
90} // Brush
91
void bind_engine(py::module &m, string name)
holds variable type data.
Definition data.h:51
#define HANDLE_ERROR_THROW(err)
Definition error.h:27
namespace containing Data structures used in Brush
Definition data.cpp:49
State check_type(const ArrayXf &x)
determines data types of columns of matrix X.
Definition data.cpp:68
Dataset read_csv(const std::string &path, const std::string &target, char sep)
read csv file into Data.
Definition io.cpp:14
std::string trim(std::string str, const std::string &chars)
Definition utils.cpp:36