Feat C++ API
A feature engineering automation tool
ml.h
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 #ifndef ML_H
6 #define ML_H
7 
8 //external includes
9 #pragma GCC diagnostic push
10 #pragma GCC diagnostic ignored "-Wdeprecated"
11 #include <shogun/base/some.h>
12 #include <shogun/base/init.h>
13 #include <shogun/machine/Machine.h>
14 #include <shogun/lib/common.h>
15 #include <shogun/labels/RegressionLabels.h>
16 #include <shogun/labels/MulticlassLabels.h>
17 #include <shogun/features/Features.h>
18 #include <shogun/machine/LinearMachine.h>
19 #include <shogun/regression/LeastAngleRegression.h>
20 #include <shogun/regression/LinearRidgeRegression.h>
21 //#include <shogun/machine/RandomForest.h>
22 #include <shogun/regression/svr/LibLinearRegression.h>
23 /* #include <shogun/classifier/svm/LibLinear.h> */
24 #include <shogun/ensemble/MeanRule.h>
25 #include <shogun/ensemble/MajorityVote.h>
26 #include <shogun/machine/LinearMulticlassMachine.h>
27 #pragma GCC diagnostic pop
28 #include <cmath>
29 // internal includes
30 #include "shogun/MyCARTree.h"
33 #include "shogun/MyLibLinear.h"
34 #include "shogun/MyRandomForest.h"
35 #include "../params.h"
36 #include "../eval/scorer.h"
37 #include "../util/utils.h"
38 #include "nlohmann/json.hpp"
39 #include "../util/serialization.h"
40 
41 // stuff being used
42 using nlohmann::json;
43 using std::string;
44 using std::dynamic_pointer_cast;
45 using std::shared_ptr;
46 using std::make_shared;
47 using std::cout;
48 namespace sh = shogun;
49 using sh::EProblemType;
50 using sh::EProbHeuristicType;
51 using sh::CBinaryLabels;
52 using sh::CMulticlassLabels;
53 using sh::CLabels;
54 
55 namespace FT{
56 
57 using namespace Util;
58 
63 namespace Model{
64 
65 enum ML_TYPE {
66  LARS, // Least Angle Regression
67  Ridge, // Ridge Regression
68  RF, // Random Forest
69  SVM, // Support Vector Machines
70  CART, // Classification and Regression Trees
71  LR, // l2-penalized Logistic Regression
72  L1_LR // L1-penalized Logistic Regression
73  };
74 extern map<ML_TYPE, float> C_DEFAULT;
79 class ML
80 {
81  public:
82 
83  /* ML(const Parameters& params, bool norm=true); */
84  ML(string ml="LinearRidgeRegression", bool norm=true,
85  bool classification = false, int n_classes = 2);
86 
87  void init(bool assign_p_est=true);
88 
89  ~ML();
90 
91  // map ml string names to enum values.
92  std::map<string, ML_TYPE> ml_hash;
93  // return vector of weights for model.
94  vector<float> get_weights(bool norm_adjust=true) const;
95 
96  // train ml model on X and return label object.
97  shared_ptr<CLabels> fit(const MatrixXf& X, const VectorXf& y,
98  const Parameters& params, bool& pass,
99  const vector<char>& dtypes=vector<char>());
100 
101  // train ml model on X and return estimation y.
102  VectorXf fit_vector(const MatrixXf& X, const VectorXf& y,
103  const Parameters& params, bool& pass,
104  const vector<char>& dtypes=vector<char>());
105 
106  // predict using a trained ML model, returning a label object.
107  shared_ptr<CLabels> predict(const MatrixXf& X,
108  bool print=false);
109 
110  // predict using a trained ML model, returning a vector of predictions.
111  VectorXf predict_vector(const MatrixXf& X);
112 
113  // predict using a trained ML model, returning a vector of predictions.
114  ArrayXXf predict_proba(const MatrixXf& X);
115 
117  VectorXf labels_to_vector(const shared_ptr<CLabels>& labels);
118 
120  shared_ptr<CLabels> retrieve_labels(
121  CDenseFeatures<float64_t>* features,
122  bool proba,
123  bool& pass);
124 
125  /* VectorXd predict(MatrixXd& X); */
126  // set data types (for tree-based methods)
127  void set_dtypes(const vector<char>& dtypes);
129  float get_bias(bool norm_adjust=true) const;
130  void set_bias(float b);
132  shared_ptr<CLabels> fit_tune(MatrixXf& X, VectorXf& y,
133  const Parameters& params, bool& pass,
134  const vector<char>& dtypes=vector<char>(),
135  bool set_default=false);
136 
137  shared_ptr<sh::CMachine> p_est;
139  string ml_str;
140  sh::EProblemType prob_type;
144  bool normalize;
146  float C; // regularization parameter
147 
148  private:
149  vector<char> dtypes;
150 };
151 //serialization
152 void to_json(json& j, const shared_ptr<ML>& ml);
153 void from_json(const json& j, shared_ptr<ML>& ml);
154 void to_json(json& j, const ML& ml);
155 void from_json(const json& j, ML& ml);
156 
157 } // namespace Model
158 } // namespace FT
159 
160 
161 #endif
class that specifies the machine learning algorithm to pair with Feat.
Definition: ml.h:80
float C
Definition: ml.h:146
int max_train_time
max seconds allowed for training
Definition: ml.h:143
Normalizer N
normalization
Definition: ml.h:142
shared_ptr< sh::CMachine > p_est
pointer to the ML object
Definition: ml.h:137
sh::EProblemType prob_type
type of learning problem; binary, multiclass or regression
Definition: ml.h:140
string ml_str
user specified ML type (string)
Definition: ml.h:139
bool normalize
control whether ML normalizes its input before training
Definition: ml.h:144
std::map< string, ML_TYPE > ml_hash
Definition: ml.h:92
vector< char > dtypes
Definition: ml.h:149
ML_TYPE ml_type
user specified ML type
Definition: ml.h:138
ML_TYPE
Definition: ml.h:65
@ L1_LR
Definition: ml.h:72
@ LARS
Definition: ml.h:66
@ SVM
Definition: ml.h:69
@ RF
Definition: ml.h:68
@ Ridge
Definition: ml.h:67
@ LR
Definition: ml.h:71
@ CART
Definition: ml.h:70
map< ML_TYPE, float > C_DEFAULT
Definition: ml.cc:14
main Feat namespace
Definition: data.cc:13
void from_json(const nl::json &, Feat &)
void to_json(nl::json &, const Feat &)
holds the hyperparameters for Feat.
Definition: params.h:25
normalizes a matrix to unit variance, 0 mean centered.
Definition: utils.h:147