13 using namespace Pop::Op;
67 vector<string> default_fns {
68 "+",
"-",
"*",
"/",
"^2",
"^3",
"sqrt",
"sin",
"cos",
"exp",
"log",
"^",
69 "logit",
"tanh",
"gauss",
"relu",
71 "b2f",
"c2f",
"and",
"or",
"not",
"xor",
"=",
"<",
"<=",
">",
">=",
"if",
"ite"
73 set_functions(default_fns);
84 if (ml ==
"LinearRidgeRegression" && classification)
89 if (this->classification)
94 if (this->dtypes.size()==0)
96 if (this->verbosity >= 2)
98 cout <<
"X data types: ";
99 for (
auto dt : this->dtypes)
105 this->set_scorer(
"",
true);
114 string tmp = this->scorer_;
117 if (this->scorer.empty() && initialized)
119 if (classification && n_classes == 2)
121 if (ml.compare(
"LR") || ml.compare(
"SVM"))
124 scorer_ =
"zero_one";
126 else if (classification){
127 if (ml.compare(
"LR") || ml.compare(
"SVM"))
128 scorer_ =
"multi_log";
130 scorer_ =
"bal_zero_one";
139 if (tmp != this->scorer_)
140 logger.
log(
"scorer changed to " + scorer_,2);
146 float u = 1.0/float(terminals.size());
147 term_weights.clear();
150 for (
unsigned i = 0;
i < terminals.size(); ++
i)
151 term_weights.push_back(u);
156 vector<float> aw = w;
157 float weighted_proportion = float(w.size())/float(terminals.size());
159 for (
unsigned i = 0;
i < aw.size(); ++
i)
165 for (
unsigned i = 0;
i < aw.size(); ++
i)
167 aw[
i] = aw[
i]/sum*weighted_proportion;
170 if (aw.size() != terminals.size())
172 "weights and " +
to_string(terminals.size()) +
175 for (
unsigned i = 0;
i < terminals.size(); ++
i)
181 term_weights.push_back((1-feedback)*u + feedback*aw[x]);
187 if (terminals.size() < 20)
189 string weights =
"terminal weights: ";
190 for (
unsigned i = 0;
i < terminals.size(); ++
i)
193 + terminals.at(
i)->variable_name
195 terminals.at(
i)->otype +
"): " +
206 max_size = (pow(2,max_depth+1)-1)*max_dim;
212 this->max_depth = max_depth;
219 this->max_dim = max_dim;
229 for (
const auto& t: terminals)
231 if (!
in(ttypes,t->otype))
232 ttypes.push_back(t->otype);
243 case 'b': otypes.push_back(
'b');
break;
244 case 'f': otypes.push_back(
'f');
break;
249 if (ttypes.size()==1 && ttypes.at(0)==
'b')
251 logger.
log(
string(
"otypes is size 1 and otypes[0]==b\n")
252 +
string(
"setting otypes to boolean...\n"),
264 otypes.push_back(
'b');
268 else if (ttypes.size()==1 && ttypes.at(0)==
'f')
270 int only_floating_ops=0;
272 for (
const auto& op : functions)
274 if (op->arity[
'f']==op->total_arity() && op->otype==
'f')
277 if (only_floating_ops == functions.size())
279 logger.
log(
string(
"all terminal and function types are float")
280 +
string(
"setting otype='f'...\n"),
283 otypes.push_back(
'f');
287 otypes.push_back(
'b');
288 otypes.push_back(
'f');
293 otypes.push_back(
'b');
294 otypes.push_back(
'f');
323 if(dtypes.size() == 0)
325 if (feature_names.size() == 0)
328 return std::unique_ptr<Node>(
330 feature_names.at(
loc)));
332 else if (feature_names.size() == 0)
337 return std::unique_ptr<Node>(
340 return std::unique_ptr<Node>(
343 return std::unique_ptr<Node>(
353 return std::unique_ptr<Node>(
355 feature_names.at(
loc)));
357 return std::unique_ptr<Node>(
359 feature_names.at(
loc)));
362 return std::unique_ptr<Node>(
364 feature_names.at(
loc)));
375 cout <<
"NM.node_map = \n";
378 cout << it->first << it->second->name << endl;
383 return std::unique_ptr<Node>();
389 protected_groups.clear();
396 while ((pos = pg.find(delim)) != string::npos)
398 token = pg.substr(0, pos);
399 protected_groups.push_back(token !=
"0");
400 pg.erase(0, pos + delim.length());
402 string msg =
"protected_groups: ";
403 for (
auto pg : protected_groups)
412 for (
int i = 0;
i < protected_groups.size(); ++
i)
414 out += protected_groups.at(
i);
415 if (
i < protected_groups.size() - 1)
424 feature_names.clear();
431 while ((pos = fn.find(delim)) != string::npos)
433 token = fn.substr(0, pos);
434 feature_names.push_back(token);
435 fn.erase(0, pos + delim.length());
441 return ravel(this->feature_names);
494 vector<string> fn_vec;
495 for (
const auto& fn : this->functions)
496 fn_vec.push_back(fn->name);
502 this->functions.clear();
503 for (
const auto& f : fns)
504 functions.push_back(createNode(f));
526 for (
const auto& term : terminals)
553 for (
const auto& op : functions)
555 op_weights.push_back(0.0);
557 for (
auto& kv : op->arity)
562 for (
unsigned j = 0; j < kv.second; ++j)
566 for (
unsigned j = 0; j < kv.second; ++j)
570 for (
unsigned j = 0; j < kv.second; ++j)
574 for (
unsigned j = 0; j < kv.second; ++j)
578 total_args += kv.second;
580 op_weights.at(
i) /= float(total_args);
593 for (
unsigned i = 0;
i < functions.size(); ++
i)
595 if (op_weights.at(
i) > 0)
597 switch (functions.at(
i)->otype)
622 for (
const auto& op : functions)
625 for (
auto& kv : op->arity)
630 for (
unsigned j = 0; j < kv.second; ++j)
634 for (
unsigned j = 0; j < kv.second; ++j)
638 for (
unsigned j = 0; j < kv.second; ++j)
642 for (
unsigned j = 0; j < kv.second; ++j)
646 total_args += kv.second;
648 op_weights.at(
i) /= float(total_args);
663 for (
size_t i = 0;
i < nf; ++
i)
664 terminals.push_back(createNode(
string(
"x"), 0, 0,
i));
668 for (
int i = 0;
i < nf; ++
i)
671 terminals.push_back(createNode(
string(
"kb"), 0,
r(), 0));
673 terminals.push_back(createNode(
string(
"kd"),
r(), 0, 0));
677 for (
const auto &val : Z)
679 longitudinalMap.push_back(val.first);
680 terminals.push_back(createNode(
string(
"z"), 0, 0, 0, val.first));
688 this->set_term_weights(vector<float>());
715 this->verbosity = verbosity;
723 vector<float> uc =
unique(y);
725 string str_classes =
"{";
728 str_classes = str_classes.substr(0,str_classes.size()-1);
732 if (
int(uc.at(0)) != 0)
734 "contiguous. The input classes are " + str_classes);
735 vector<int> cont_classes(uc.size());
736 iota(cont_classes.begin(), cont_classes.end(), 0);
737 for (
int i = 0;
i < cont_classes.size(); ++
i)
739 if (
int(uc.at(
i)) != cont_classes.at(
i))
741 "contiguous. Passed labels = " + str_classes);
743 n_classes = uc.size();
746 classes.push_back(
int(c));
752 class_weights.resize(n_classes);
753 sample_weights.clear();
754 for (
unsigned i = 0;
i < n_classes; ++
i){
755 class_weights.at(
i) = float(
756 (y.cast<
int>().array() ==
int(classes.at(
i))).count())/y.size();
757 class_weights.at(
i) = (1 - class_weights.at(
i))*
float(n_classes);
759 for (
unsigned i = 0;
i < y.size(); ++
i)
761 sample_weights.push_back(class_weights.at(
int(y(
i))));
void set_log_level(int &verbosity)
string log(string m, int v, string sep="\n") const
print message with verbosity control.
std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > LongData
#define THROW_LENGTH_ERROR(err)
#define THROW_INVALID_ARGUMENT(err)
std::string ravel(const vector< string > &v, string sep)
takes a vector string and returns it as a delimited string.
vector< char > find_dtypes(const MatrixXf &X)
determines data types of columns of matrix X.
bool in(const vector< T > v, const T &i)
check if element is in vector.
vector< T > unique(vector< T > w)
returns unique elements in vector
std::string to_string(const T &value)
template function to convert objects to string for logging
string get_protected_groups()
void set_terminals(int nf, const LongData &Z)
set the terminals with longitudinal data
void set_sample_weights(VectorXf &y)
sets the weights of each sample (and class weights)
vector< string > get_functions()
returns the set of functions to use determined at run-time.
void set_current_gen(int g)
sets current generation
std::unique_ptr< Node > createNode(std::string str, float d_val=0, bool b_val=false, size_t loc=0, string name="")
return unique pointer to a node based on the string passed
void set_op_weights()
sets weights for operators.
void set_feature_names(string fn)
void set_functions(const vector< string > &fns)
sets available functions and verifies output types.
void init(const MatrixXf &X, const VectorXf &y)
void set_max_dim(unsigned int max_dim)
set maximum dimensionality of programs
string get_feature_names()
void set_max_depth(unsigned int max_depth)
set max depth of programs
void set_verbosity(int verbosity)
set level of debug info
void set_otypes(bool terminals_set=false)
set the output types of programs
void updateSize()
max_size is max_dim binary trees of max_depth
void set_protected_groups(string fn)
void set_term_weights(const vector< float > &w)
sets weights for terminals.
void set_objectives(const vector< string > &obj)
set the objectives
void set_scorer(string sc="", bool initialized=false)
sets scorer type
void set_classes(const VectorXf &y)
sets the number of classes based on target vector y.
std::map< std::string, Node * > node_map