9 #include <unordered_set>
17 int val = (int) (percentage * 100);
18 int lpad = (int) (percentage *
PBWIDTH);
20 printf (
"\rCompleted %3d%% [%.*s%*s]", val, lpad,
PBSTR.c_str(), rpad,
"");
27 void load_csv (
const std::string & path, MatrixXf& X, VectorXf& y,
28 vector<string>& names, vector<char> &dtypes,
bool& binary_endpoint,
37 std::vector<float> values, targets;
38 unsigned rows=0, col=0, target_col = 0;
40 while (std::getline(indata, line))
42 std::stringstream lineStream(line);
45 while (std::getline(lineStream, cell, sep))
51 if (!cell.compare(
"class") || !cell.compare(
"target")
52 || !cell.compare(
"label"))
55 names.push_back(cell);
57 else if (col != target_col)
58 values.push_back(std::stod(cell));
60 targets.push_back(std::stod(cell));
68 X = Map<MatrixXf>(values.data(), values.size()/(rows-1), rows-1);
69 y = Map<VectorXf>(targets.data(), targets.size());
71 if (X.cols() != y.size())
73 if (X.rows() != names.size())
75 string error_msg =
"header missing or incorrect number of "
77 error_msg +=
"X size: " +
to_string(X.rows()) +
"x"
79 error_msg +=
"feature names: ";
81 error_msg += fn +
",";
87 string print_dtypes =
"dtypes: ";
88 for (
unsigned i = 0;
i < dtypes.size(); ++
i)
89 print_dtypes += (names.at(
i) +
" (" +
to_string(dtypes.at(
i))
95 binary_endpoint = (y.array() == 0 || y.array() == 1).all();
101 std::map<
string, std::pair<vector<ArrayXf>, vector<ArrayXf> > > &Z,
104 std::map<string, std::map<int, std::pair<vector<float>, vector<float> > > > dataMap;
105 std::ifstream indata;
110 std::string line, firstKey =
"";
113 std::getline(indata, header);
115 std::stringstream lineStream(header);
117 std::map<string,int> head_to_col;
118 for (
int i = 0;
i<4; ++
i)
121 std::getline(lineStream,tmp, sep);
122 head_to_col[tmp] =
i;
125 while (std::getline(indata, line))
127 std::stringstream lineStream(line);
128 std::string sampleNo, value, time, type;
130 vector<string> cols(4);
131 std::getline(lineStream, cols.at(0), sep);
132 std::getline(lineStream, cols.at(1), sep);
133 std::getline(lineStream, cols.at(2), sep);
134 std::getline(lineStream, cols.at(3), sep);
136 sampleNo = cols.at(head_to_col.at(
"id"));
137 time = cols.at(head_to_col.at(
"date"));
138 value = cols.at(head_to_col.at(
"value"));
139 type = cols.at(head_to_col.at(
"name"));
143 if(!firstKey.compare(
""))
147 dataMap[type][std::stoi(sampleNo)].first.push_back(std::stod(value));
148 dataMap[type][std::stoi(sampleNo)].second.push_back(std::stod(time));
151 int numVars = dataMap.size();
152 int numSamples = dataMap.at(firstKey).size();
155 for (
const auto &val: dataMap )
157 for(x = 0; x < numSamples; ++x)
159 ArrayXf arr1 = Map<ArrayXf>(dataMap.at(val.first).at(x).first.data(),
160 dataMap.at(val.first).at(x).first.size());
161 ArrayXf arr2 = Map<ArrayXf>(dataMap.at(val.first).at(x).second.data(),
162 dataMap.at(val.first).at(x).second.size());
163 Z[val.first].first.push_back(arr1);
164 Z[val.first].second.push_back(arr2);
176 std::map<
string, std::pair<vector<ArrayXf>, vector<ArrayXf> > > &Z,
177 char sep,
const vector<int>& idx)
187 std::unordered_set<int> idSet;
189 std::map<int, vector<int>> idLoc;
190 std::map<int, int> locID;
192 for(
const auto&
id : idx)
194 auto tmp = idSet.insert(
id);
195 if (!tmp.second || *tmp.first !=
id)
197 if(idSet.find(
id) == idSet.end())
199 cout <<
"failed to find " <<
id <<
" in idSet\n";
200 cout <<
"retrying..\n";
202 while (blrg<100 && (!tmp.second || *tmp.first !=
id) )
204 auto tmp = idSet.insert(
id);
213 idLoc[id].push_back(
i);
228 std::map<string, std::map<int, std::pair<vector<float>, vector<float> > > > dataMap;
229 std::ifstream indata;
234 std::string line, firstKey =
"";
238 std::getline(indata, header);
240 std::stringstream lineStream(header);
242 std::map<string,int> head_to_col;
243 for (
int i = 0;
i<4; ++
i)
246 std::getline(lineStream,tmp, sep);
248 head_to_col[tmp] =
i;
253 cout <<
"reading " << path <<
"...\n";
254 while (std::getline(indata, line))
256 std::stringstream lineStream(line);
257 std::string sampleNo, value, time,
name;
259 vector<string> cols(4);
260 std::getline(lineStream, cols.at(0), sep);
261 std::getline(lineStream, cols.at(1), sep);
262 std::getline(lineStream, cols.at(2), sep);
263 std::getline(lineStream, cols.at(3), sep);
265 cols.at(3) =
trim(cols.at(3));
267 sampleNo = cols.at(head_to_col.at(
"id"));
268 time = cols.at(head_to_col.at(
"date"));
269 value = cols.at(head_to_col.at(
"value"));
270 name = cols.at(head_to_col.at(
"name"));
272 if(!firstKey.compare(
""))
275 int sID = std::stol(sampleNo);
277 if(idSet.find(sID) != idSet.end())
281 for (
const auto&
loc : idLoc.at(sID))
283 dataMap[
name][
loc].first.push_back(std::stod(value));
284 dataMap[
name][
loc].second.push_back(std::stod(time));
299 for (
const auto &val: dataMap )
302 int numSamples = val.second.size();
303 for (
int x = 0; x<numSamples; ++x)
305 if (val.second.find(x) == val.second.end())
308 +
" not found (patient id = "
314 int numVars = dataMap.size();
317 for (
const auto &val: dataMap )
320 int numSamples = val.second.size();
324 for(
int x = 0; x < numSamples; ++x)
327 ArrayXf arr1 = Map<ArrayXf>(dataMap.at(val.first).at(x).first.data(),
328 dataMap.at(val.first).at(x).first.size());
329 ArrayXf arr2 = Map<ArrayXf>(dataMap.at(val.first).at(x).second.data(),
330 dataMap.at(val.first).at(x).second.size());
331 Z[val.first].first.push_back(arr1);
332 Z[val.first].second.push_back(arr2);
#define THROW_LENGTH_ERROR(err)
#define THROW_RUNTIME_ERROR(err)
#define THROW_INVALID_ARGUMENT(err)
std::string trim(std::string str, const std::string &chars)
void load_longitudinal(const std::string &path, std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > &Z, char sep)
load longitudinal csv file into matrix.
void load_partial_longitudinal(const std::string &path, std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > &Z, char sep, const vector< int > &idx)
load partial longitudinal csv file into matrix according to idx vector
vector< char > find_dtypes(const MatrixXf &X)
determines data types of columns of matrix X.
void printProgress(float percentage)
outputs a progress bar, filled according to
std::string to_string(const T &value)
template function to convert objects to string for logging
void load_csv(const std::string &path, MatrixXf &X, VectorXf &y, vector< string > &names, vector< char > &dtypes, bool &binary_endpoint, char sep)
load csv file into matrix.