33 assert(v.size()>0 &&
" attemping to return random choice from empty vector");
34 std::vector<size_t> vi(v.size());
35 std::iota(vi.begin(), vi.end(), 0);
37 return v.at(idx)->clone();
52 unsigned start=
pop.size();
54 #pragma omp parallel for
55 for (
unsigned i = start;
i<
pop.size(); ++
i)
78 pass =
cross(mom, dad, child, params, d);
96 pass =
mutate(mom,child,params,d);
104 assert(child.
size()>0);
108 pop.individuals.at(
i) = child;
128 mom.
clone(child,
false);
130 if (rf < 1.0/3.0 && child.
get_dim() > 1)
141 logger.
log(
"\tcorrelation_delete_mutate",3);
143 child,mom.
Phi,params,d);
147 logger.
log(
"\tdelete_dimension_mutate",3);
154 else if (rf < 2.0/3.0 && child.
size() < params.
max_size)
181 float n = child.
size();
190 logger.
log(
"\t\tmutating node " + p->name, 3);
193 if (p->total_arity() > 0)
198 if (f->otype == p->otype &&
199 f->arity == p->arity)
204 replacements.push_back(f->rnd_clone());
214 if (t->otype == p->otype)
215 replacements.push_back(t->clone());
219 if (replacements.size() == 0)
221 WARN(
"WARNING: couldn't mutate " +
222 to_string(p->name)+
", no valid replacements found\n");
241 float n = child.
size();
246 for (
unsigned i = 0;
i< child.
program.size(); ++
i)
252 child.
program.at(
i)->name +
" with probability " +
263 if (f->otype==child.
program.at(
i)->otype
264 && f->arity.at(child.
program.at(
i)->otype) > 0)
268 map<char,unsigned> fn_arity = f->arity;
269 --fn_arity.at(child.
program.at(
i)->otype);
270 bool valid_function =
true;
271 for (
auto kv : fn_arity)
277 valid_function=
false;
283 fns.push_back(f->rnd_clone());
302 map<char, unsigned> insert_arity = insertion.back()->arity;
305 --insert_arity.at(child.
program.at(
i)->otype);
307 vector<char> type_order = {
'f',
'b',
'c',
'z'};
308 for (
auto type : type_order)
311 if (child.
program.at(
i)->otype==type)
314 for (
int k = end; k != start-1; --k)
318 insertion.push_back(child.
program.at(k)->clone());
323 for (
unsigned j = 0; j< insert_arity.at(type); ++j)
331 std::reverse(insertion.begin(),insertion.end());
334 for (
const auto& ins : insertion) s += ins->name +
" ";
335 logger.
log(
"\t\tinsertion: " + s +
"\n", 3);
339 insertion,
size_t(0), insertion.size()-1);
341 i += insertion.size()-1;
354 for (
const auto& ip : insertion)
355 child.
program.push_back(ip->clone());
369 for (
unsigned i = 0;
i< child.
program.size(); ++
i)
378 for (
int j=start; j<=end; ++j)
380 portion += child.
program.at(j)->name +
" ";
383 portion +
" ] from program " +
393 if ( t->otype==child.
program.at(
i)->otype )
395 terms.push_back(t->rnd_clone());
401 logger.
log(
"\t\tnevermind, couldn't find a matching terminal",
407 std::unique_ptr<Node> insertion =
random_node(terms);
410 logger.
log(
"\t\tinsertion: " + insertion->name +
"\n", 4);
416 for (
unsigned i = start;
i<=end; ++
i)
418 s+= child.
program.at(
i)->name +
" ";
429 logger.
log(
"\t\tresult of delete mutation: " +
453 for (
unsigned i = start;
i<=end; ++
i)
455 s+= child.
program.at(
i)->name +
" ";
480 for (
int i = 0;
i < Phi.rows(); ++
i)
482 Phi.row(
i) = Phi.row(
i).array() - Phi.row(
i).mean();
486 float highest_corr = 0;
488 for (
int i = 0;
i < Phi.rows()-1; ++
i)
490 for (
int j =
i+1; j < Phi.rows(); ++j)
498 if (corr > highest_corr)
507 +
"; corr = " +
to_string(highest_corr), 3);
508 if (f1 == 0 && f2 == 0)
514 Phi.row(f1).array());
516 Phi.row(f2).array());
519 int choice = corr_f1 <= corr_f2 ? f1 : f2;
523 size_t end = roots.at(choice);
528 for (
unsigned i = start;
i<=end; ++
i)
529 s+= child.
program.at(
i)->name +
" ";
536 logger.
log(
"\t\tresult of corr delete mutation: "
542 cout <<
"Error in correlation_delete_mutate: child is not a valid "
545 cout << child.
get_eqn() << endl;
548 return highest_corr > 0.999;
568 vector<size_t> mlocs, dlocs;
569 size_t i1, j1, i2, j2;
575 vector<char> d_otypes;
576 for (
const auto& p : dad.
program)
577 if(!
in(d_otypes,p->otype))
578 d_otypes.push_back(p->otype);
581 for (
size_t i =0;
i<mom.
size(); ++
i)
587 logger.
log(
"WARNING: no overlapping types between " +
595 for (
size_t i =0;
i<dad.
size(); ++
i)
612 logger.
log(
"\t\trandom choice mlocs (size "+
661 vector<size_t> mlocs, dlocs;
663 size_t i1, j1, j1_idx, i2, j2;
666 vector<int> mlocs_indices(mlocs.size());
667 std::iota(mlocs_indices.begin(),mlocs_indices.end(),0);
670 logger.
log(
"\t\trandom choice mlocs (size "+
675 j1 = mlocs.at(j1_idx);
684 VectorXf tree = (mom.
ml->get_weights().at(j1_idx) *
685 mom.
Phi.row(j1_idx).array());
687 VectorXf mom_pred_minus_tree = mom.
yhat - tree;
694 VectorXf mom_residual = d.
y - mom_pred_minus_tree;
699 vector<float> corrs(dad.
Phi.rows());
700 int best_corr_idx = 0;
701 float best_corr = -1;
704 for (
int i = 0;
i < dad.
Phi.rows(); ++
i)
707 dad.
Phi.row(
i).array());
710 if (corr > best_corr )
721 j2 = dlocs.at(best_corr_idx);
769 VectorXf R = d.
y.array() - d.
y.mean();
771 if (mom.
Phi.cols() != dad.
Phi.cols())
773 cout <<
"!!WARNING!! mom.Phi.cols() = " << mom.
Phi.cols()
774 <<
" and dad.Phi.cols() = " << dad.
Phi.cols() <<
"\n";
775 cout <<
" d.y size: " << d.
y.size() <<
"\n";
780 MatrixXf PhiA(mom.
Phi.rows()+dad.
Phi.rows(), mom.
Phi.cols());
787 for (
int i = 0;
i < PhiA.rows(); ++
i)
789 PhiA.row(
i) = PhiA.row(
i).array() - PhiA.row(
i).mean();
800 bool condition =
true;
803 float best_corr = -1;
807 for (
int i = 0;
i < PhiA.rows(); ++
i)
813 if (corr > best_corr)
825 float b = (
covariance(PhiA.row(best_corr_idx),R) /
830 R = R - b*PhiA.row(best_corr_idx).transpose();
831 deltaR = (deltaR - R.norm()) / deltaR;
837 sel_idx.push_back(best_corr_idx);
850 condition = (deltaR > tol
851 && nsel <= (mom.
Phi.rows() + dad.
Phi.rows()));
855 condition = nsel < mom.
Phi.rows() ;
861 vector<size_t> mlocs, dlocs;
869 for (
int idx : sel_idx)
873 if (idx < mom.
Phi.rows())
875 int stop = mlocs.at(idx);
882 for (
unsigned i = start;
i <= stop ; ++
i)
889 int stop = dlocs.at(idx - mom.
Phi.rows());
896 for (
unsigned i = start;
i <= stop ; ++
i)
944 cout <<
"i1 ( " << i1 <<
") >= v1 size (" << v1.size() <<
")\n";
946 cout <<
"i2 ( " << i2 <<
") >= v2 size (" << v2.size() <<
")\n";
948 cout <<
"j1+1 < 0 (j1 = " << j1 << endl;
950 cout <<
"j2+1 < 0 (j2 = " << j2 << endl;
955 for (
unsigned i = 0;
i < i1 ; ++
i)
956 vnew.push_back(v1.at(
i)->clone());
958 for (
unsigned i = i2;
i <= j2 ; ++
i)
959 vnew.push_back(v2.at(
i)->clone());
961 for (
unsigned i = j1+1;
i < v1.size() ; ++
i)
962 vnew.push_back(v1.at(
i)->clone());
964 catch (std::bad_alloc& ba)
966 std::cerr <<
"bad_alloc caught: " << ba.what() <<
"\n";
973 std::cout <<
"\t\tattempting the following crossover:\n\t\t";
977 std::cout << mom.
program.at(
i)->name <<
" ";
981 std::cout <<
"\n\t\t";
986 std::cout << dad.
program.at(
i)->name <<
" ";
990 std::cout <<
"\n\t\t";
993 std::cout <<
"child after cross: ";
994 for (
unsigned i = 0;
i< child.
program.size(); ++
i){
995 if (
i==i1) std::cout <<
"[";
996 std::cout << child.
program.at(
i)->name <<
" ";
997 if (
i==i1+j2-i2) std::cout <<
"]";
data holding X, y, and Z data
individual programs in the population
void set_parents(const vector< Individual > &parents)
set parent ids using parents
int size() const
return size of program
vector< float > p
probability of variation of subprograms
VectorXf yhat
current output
string get_eqn()
return symbolic representation of program
void clone(Individual &cpy, bool sameid=true) const
clone this individual
MatrixXf Phi
transformation output of program
NodeVector program
executable data structure
shared_ptr< ML > ml
ML model, trained on Phi.
string program_str() const
return program name list
vector< float > get_p() const
get probabilities of variation
unsigned int get_dim()
grab sub-tree locations given starting point.
string log(string m, int v, string sep="\n") const
print message with verbosity control.
T random_choice(const vector< T > &v)
float get_cross_rate()
return current cross rate
void print_cross(const Individual &, size_t, size_t, const Individual &, size_t, size_t, Individual &, bool after=true)
debugging printout of crossover operation.
bool cross(const Individual &mom, const Individual &dad, Individual &child, const Parameters ¶ms, const Data &d)
crossover
void delete_mutate(Individual &child, const Parameters ¶ms)
void splice_programs(NodeVector &vnew, const NodeVector &v1, size_t i1, size_t j1, const NodeVector &v2, size_t i2, size_t j2)
splice two programs together
void vary(Population &pop, const vector< size_t > &parents, const Parameters ¶ms, const Data &d)
method to handle variation of population
bool correlation_delete_mutate(Individual &child, MatrixXf Phi, const Parameters ¶ms, const Data &d)
bool mutate(const Individual &mom, Individual &child, const Parameters ¶ms, const Data &d)
mutation
void set_cross_rate(float cr)
update cross rate
float cross_rate
fraction of crossover in total variation
void insert_mutate(Individual &child, const Parameters ¶ms)
bool stagewise_cross(const Individual &mom, const Individual &dad, Individual &child, const Parameters ¶ms, const Data &d)
stagewise crossover
void delete_dimension_mutate(Individual &child, const Parameters ¶ms)
bool residual_cross(const Individual &mom, const Individual &dad, Individual &child, const Parameters ¶ms, const Data &d)
residual crossover
void point_mutate(Individual &child, const Parameters ¶ms)
float covariance(const ArrayXf &x, const ArrayXf &y)
covariance of x and y
float pearson_correlation(const ArrayXf &x, const ArrayXf &y)
the normalized covariance of x and y
bool in(const vector< T > v, const T &i)
check if element is in vector.
std::string to_string(const T &value)
template function to convert objects to string for logging
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
std::unique_ptr< Node > random_node(const NodeVector &v)
holds the hyperparameters for Feat.
vector< char > dtypes
data types of input parameters
unsigned int max_size
max size of programs (length)
unsigned int max_dim
maximum dimensionality of programs
bool stagewise_xo_tol
use stagewise crossover
NodeVector functions
function nodes available in programs
bool stagewise_xo
use stagewise crossover
bool residual_xo
use residual crossover
int pop_size
population size
vector< float > term_weights
probability weighting of terminals
unsigned num_features
number of features
int current_gen
holds current generation
vector< std::string > longitudinalMap
float root_xo_rate
crossover
NodeVector terminals
terminal nodes available in programs vector storing longitudinal data keys
vector< char > ttypes
program terminal types ('f', 'b')
vector< char > otypes
program output types ('f', 'b')
vector< float > op_weights
probability weighting of functions
bool corr_delete_mutate
use correlation delete mutation
an extension of a vector of unique pointers to nodes
vector< size_t > roots() const
returns indices of root nodes
void make_program(const NodeVector &functions, const NodeVector &terminals, int max_d, const vector< float > &term_weights, const vector< float > &op_weights, int dim, char otype, vector< string > longitudinalMap, const vector< char > &term_types)
bool is_valid_program(unsigned num_features, vector< string > longitudinalMap)
void make_tree(const NodeVector &functions, const NodeVector &terminals, int max_d, const vector< float > &term_weights, const vector< float > &op_weights, char otype, const vector< char > &term_types)
size_t subtree(size_t i, char otype='0', string indent="> ") const
Defines a population of programs and functions for constructing them.