Feat C++ API
A feature engineering automation tool
params.h
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 #ifndef PARAMS_H
6 #define PARAMS_H
7 // internal includes
8 #include "pop/nodewrapper.h"
9 #include "pop/nodevector.h"
10 #include "util/logger.h"
11 #include "util/utils.h"
12 #include "pop/nodemap.h"
13 
14 namespace FT{
15 
16 using namespace Pop;
17 using namespace Op;
18 
20 
24 struct Parameters
25 {
26  std::map<std::string, Node*> node_map;
28  int pop_size = 100;
29  int gens = 100;
31  string ml;
32  bool classification = false;
33  int max_stall = 0;
34  vector<char> otypes;
35  vector<char> ttypes;
36  char otype;
39  int verbosity = 0;
40  vector<float> term_weights;
41  vector<float> op_weights;
45  vector<std::string> longitudinalMap;
46 
47  unsigned int max_depth = 3;
48  unsigned int max_size;
49  unsigned int max_dim = 10;
50  bool erc = false;
51  unsigned num_features;
52  vector<string> objectives{"fitness","complexity"};
53  bool shuffle = true;
54  float split = 0.75;
55  vector<char> dtypes;
56  float feedback = 0.5;
57  unsigned int n_classes;
58  float cross_rate;
59  vector<int> classes;
60  vector<float> class_weights;
61  vector<float> sample_weights;
62  string scorer;
63  string scorer_;
64  vector<string> feature_names;
65  bool backprop = false;
66  bool hillclimb = false;
67  int max_time = -1;
68  bool use_batch = false;
69  bool residual_xo=false;
70  bool stagewise_xo=false;
71  bool stagewise_xo_tol=true;
72  bool corr_delete_mutate=false;
73  float root_xo_rate = 0.5;
74  bool softmax_norm;
75  bool normalize;
76  vector<bool> protected_groups;
77  bool tune_initial;
78  bool tune_final;
80  string fn_str;
81  int n_jobs = 1;
82 
83  struct BP
84  {
85  int iters=10;
86  float learning_rate = 0.1;
87  int batch_size = 0;
88  BP(int i, float l, int bs): iters(i), learning_rate(l), batch_size(bs) {};
89  BP() = default;
90  };
91 
92  BP bp;
93 
94  struct HC
95  {
96  int iters;
97  float step;
98  HC(int i, float s): iters(i), step(s) {};
99  HC() = default;
100  };
101 
102  HC hc;
103 
104  // Parameters(int pop_size, int gens, string ml, bool classification,
105  // int max_stall, char ot, int verbosity, string fs, float cr,
106  // float root_xor, unsigned int max_depth, unsigned int max_dim,
107  // bool constant, string obj, bool sh, float sp, float fb,
108  // string sc, string fn, bool bckprp, int iters, float lr, int bs,
109  // bool hclimb, int maxt, bool res_xo, bool stg_xo,
110  // bool stg_xo_tol, bool sftmx, bool nrm, bool corr_mut,
111  // bool tune_init, bool tune_fin);
112 
113  Parameters();
115 
121  void init(const MatrixXf& X, const VectorXf& y);
122 
124  void set_current_gen(int g);
125 
127  void set_scorer(string sc="", bool initialized=false);
128 
130  void set_term_weights(const vector<float>& w);
131 
134 
136  std::unique_ptr<Node> createNode(std::string str, float d_val = 0, bool b_val = false,
137  size_t loc = 0, string name = "");
138 
140  vector<string> get_functions();
142  void set_functions(const vector<string>& fns);
143 
145  void updateSize();
146 
148  void set_max_depth(unsigned int max_depth);
149 
151  void set_max_dim(unsigned int max_dim);
152 
154  void set_terminals(int nf, const LongData& Z);
155  void set_terminals(int nf){LongData Z; set_terminals(nf,Z); };
156 
157  void set_feature_names(string fn);
158  string get_feature_names();
159 
160  string get_protected_groups();
161  void set_protected_groups(string fn);
162 
164  auto get_objectives(){ return objectives; };
166  void set_objectives(const vector<string>& obj);
167 
169  void set_verbosity(int verbosity);
170 
171  void set_otype(char ot);
172 
173  void set_ttypes();
174 
176  void set_otypes(bool terminals_set=false);
177 
179  void set_classes(const VectorXf& y);
180 
182  void set_sample_weights(VectorXf& y);
183 
186 };
187 
189  pop_size,
190  gens,
191  current_gen,
192  ml,
193  classification,
194  max_stall,
195  otypes,
196  ttypes,
197  otype,
198  verbosity,
199  term_weights,
200  op_weights,
201  fn_str,
202  terminals,
203  longitudinalMap,
204  max_depth,
205  max_size,
206  max_dim,
207  erc,
208  num_features,
209  objectives,
210  shuffle,
211  split,
212  dtypes,
213  feedback,
214  n_classes,
215  cross_rate,
216  classes,
217  class_weights,
218  sample_weights,
219  scorer,
220  scorer_,
221  feature_names,
222  backprop,
223  hillclimb,
224  max_time,
225  use_batch,
226  residual_xo,
227  stagewise_xo,
228  stagewise_xo_tol,
229  corr_delete_mutate,
230  root_xo_rate,
231  softmax_norm,
232  normalize,
233  protected_groups,
234  tune_initial,
235  tune_final
236  );
237 } // FT
238 #endif
std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > LongData
Definition: data.h:23
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(NodeFuzzyFixedSplit< float >, name, otype, arity, complexity, visits, train, threshold, threshold_set) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(NodeFuzzyFixedSplit< int >
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
BP(int i, float l, int bs)
Definition: params.h:88
HC(int i, float s)
Definition: params.h:98
holds the hyperparameters for Feat.
Definition: params.h:25
bool softmax_norm
use softmax norm on probabilities
Definition: params.h:74
vector< string > get_functions()
returns the set of functions to use determined at run-time.
vector< char > dtypes
data types of input parameters
Definition: params.h:55
unsigned int max_size
max size of programs (length)
Definition: params.h:48
void set_op_weights()
sets weights for operators.
float cross_rate
cross rate for variation
Definition: params.h:58
vector< int > classes
class labels
Definition: params.h:59
vector< float > class_weights
weights for each class
Definition: params.h:60
vector< float > sample_weights
weights for each sample
Definition: params.h:61
unsigned int n_classes
number of classes for classification
Definition: params.h:57
void set_functions(const vector< string > &fns)
sets available functions and verifies output types.
string scorer
loss function argument
Definition: params.h:62
NodeVector functions
function nodes available in programs
Definition: params.h:42
string fn_str
Definition: params.h:80
string ml
machine learner used with Feat
Definition: params.h:31
vector< float > term_weights
probability weighting of terminals
Definition: params.h:40
HC hc
stochastic hill climbing parameters
Definition: params.h:102
unsigned num_features
number of features
Definition: params.h:51
void set_terminals(int nf)
Definition: params.h:155
bool tune_initial
tune initial ML model
Definition: params.h:77
int current_gen
holds current generation
Definition: params.h:30
BP bp
backprop parameters
Definition: params.h:92
bool normalize
whether to normalize the input data
Definition: params.h:75
void initialize_node_map()
defines a map of function names to their respective nodes.
vector< std::string > longitudinalMap
Definition: params.h:45
NodeVector terminals
terminal nodes available in programs vector storing longitudinal data keys
Definition: params.h:43
vector< string > feature_names
names of features
Definition: params.h:64
vector< bool > protected_groups
protected attributes in X
Definition: params.h:76
std::map< std::string, Node * > node_map
Definition: params.h:26
vector< char > ttypes
program terminal types ('f', 'b')
Definition: params.h:35
vector< char > otypes
program output types ('f', 'b')
Definition: params.h:34
vector< float > op_weights
probability weighting of functions
Definition: params.h:41
char otype
user parameter for output type setup
Definition: params.h:36
auto get_objectives()
get objectives as comma-delimited string
Definition: params.h:164
bool tune_final
tune final ML model string of comma-delimited operator names, used to choose functions
Definition: params.h:78
string scorer_
actual loss function used, determined by scorer
Definition: params.h:63
int random_state
random seed
Definition: params.h:27
an extension of a vector of unique pointers to nodes
Definition: nodevector.h:23