d5/d02/auto__backprop_8cc_source.html

 #include "auto_backprop.h"


 namespace FT {


     namespace Opt{


         AutoBackProp::AutoBackProp(string scorer, int iters, float n, float a)

         {

             /* this->program = program.get_data(); */

             score_hash["mse"] = &Eval::squared_difference;

             score_hash["log"] =  &Eval::log_loss;

             score_hash["multi_log"] =  &Eval::multi_log_loss;

             score_hash["fpr"] =  &Eval::log_loss;

             score_hash["zero_one"] = &Eval::log_loss;


             d_score_hash["mse"] = &Eval::d_squared_difference;

             d_score_hash["log"] =  &Eval::d_log_loss;

             d_score_hash["multi_log"] =  &Eval::d_multi_log_loss;

             d_score_hash["fpr"] =  &Eval::d_log_loss;

             d_score_hash["zero_one"] = &Eval::d_log_loss;


             this->d_cost_func = d_score_hash.at(scorer);

             this->cost_func = score_hash.at(scorer);


             /* this->X = X; */

             /* this->labels = labels; */

             this->iters = iters;

             this->n = n;

             this->epT = 0.01*this->n;   // min learning rate

             this->a = a;

         }


         void AutoBackProp::print_weights(NodeVector& program) {

             for (const auto& p : program)

             {

                 cout << "( " << p->name;

                 if (p->isNodeDx()) {


                     NodeDx* dNode = dynamic_cast<NodeDx*>(p.get());

                     for (int i = 0; i < dNode->arity.at('f'); i++) {

                         cout << "," << dNode->W.at(i);

                     }

                     dNode = nullptr;

                 }


                 cout << " ) ";

             }

             /* cout << "\n"; */

         }


         void AutoBackProp::run(Individual& ind, const Data& d,

                                 const Parameters& params)

         {

             vector<size_t> roots = ind.program.roots();

             float min_loss;

             float current_loss, current_val_loss;

             vector<vector<float>> best_weights;

             // split up the data so we have a validation set

             DataRef BP_data(d.X, d.y, d.Z, d.classification);

             BP_data.train_test_split(true, 0.5);

             // set up batch data

             MatrixXf Xb, Xb_v;

             VectorXf yb, yb_v;

             /* std::map<string, std::pair<vector<ArrayXf>, vector<ArrayXf> > > Zb, Zb_v; */

             LongData Zb, Zb_v;

             /* cout << "y: " << d.y.transpose() << "\n"; */

             Data batch_data(Xb, yb, Zb, params.classification);

             /* Data db_val(Xb_v, yb_v, Zb_v, params.classification); */

             /* db_val.set_validation();    // make this a validation set */

             // if batch size is 0, set batch to 20% of training data

             int batch_size = params.bp.batch_size > 0?

                 params.bp.batch_size : .2*BP_data.t->y.size();

             /* d.get_batch(db_val, );     // draw a batch for the validation data */

             // number of iterations to allow validation fitness to not improve

             int patience = 3;

             int missteps = 0;


             this->epk = n;  // starting learning rate

             /* logger.log("running backprop on " + ind.get_eqn(), 2); */

             /* cout << ind.get_eqn() << endl; */

             logger.log("=========================",4);

             logger.log("Iteration,Train Loss,Val Loss,Weights",4);

             logger.log("=========================",4);

             for (int x = 0; x < this->iters; x++)

             {

                 logger.log("get batch",3);

                 // get batch data for training

                 BP_data.t->get_batch(batch_data, batch_size);

                 /* cout << "batch_data.y: " */

                 /*      << batch_data.y.transpose() << "\n"; */

                 // Evaluate forward pass

                 MatrixXf Phi;

                 logger.log("forward pass",3);

                 vector<Trace> stack_trace = forward_prop(ind, batch_data,

                         Phi, params);

                 // Evaluate ML model on Phi

                 bool pass = true;

                 auto ml = std::make_shared<ML>(params.ml, true,

                         params.classification, params.n_classes);


                 logger.log("ml fit",3);

                 shared_ptr<CLabels> yhat = ml->fit(Phi,

                         batch_data.y,params,pass,ind.dtypes);


                 if (!pass || stack_trace.size() ==0 )

                     break;


                 vector<float> Beta = ml->get_weights();

                 /* cout << "cost func\n"; */

                 current_loss = this->cost_func(batch_data.y, yhat,

                         params.class_weights).mean();


                 // Evaluate backward pass

                 size_t s = 0;

                 for (int i = 0; i < stack_trace.size(); ++i)

                 {

                     while (!ind.program.at(roots.at(s))->isNodeDx()) ++s;

                     /* cout << "roots.at(s): " << roots.at(s) << endl; */

                     /* cout << "running backprop on " */

                     /*      << ind.get_eqn() << "\n"; */

                     /* << " from " */

                     /*      << roots.at(s) << " to " */

                     /*      << ind.program.subtree(roots.at(s)) << "\n"; */


                     backprop(stack_trace.at(i), ind.program,

                             ind.program.subtree(roots.at(s)),

                             roots.at(s), Beta.at(s), // /ml->N.scale.at(s),

                             yhat, batch_data, params.class_weights);

                 }


                 // check validation fitness for early stopping

                 MatrixXf Phival = ind.out((*BP_data.v));

                 logger.log("checking validation fitness",3);

                 /* cout << "Phival: " << Phival.rows()

                  * << " x " << Phival.cols() << "\n"; */

                 /* cout << "y_val\n"; */

                 shared_ptr<CLabels> y_val = ml->predict(Phival);

                 current_val_loss = this->cost_func(BP_data.v->y, y_val,

                         params.class_weights).mean();

                 if (x==0 || current_val_loss < min_loss)

                 {

                     min_loss = current_val_loss;

                     best_weights = ind.program.get_weights();

                 }

                 else

                 {

                     ++missteps;

                     /* cout << "missteps: " << missteps << "\n"; */

                     logger.log("",3);           // update learning rate

                 }

                 // early stopping trigger

                 if (missteps == patience

                         || std::isnan(min_loss)

                         || std::isinf(min_loss)

                         || min_loss <= NEAR_ZERO)

                     break;

                 else

                     logger.log("min loss: " + std::to_string(min_loss), 3);


                 float alpha = float(x)/float(iters);


                 this->epk = (1 - alpha)*this->epk + alpha*this->epT;

                 /* this->epk = this->epk + this->epT; */

                 /* cout << "epk: " << this->epk << "\n"; */

                 if (params.verbosity>3)

                 {

                     cout << x << ","

                      << current_loss << ","

                      << current_val_loss << ",";

                      print_weights(ind.program);

                 }

             }

             logger.log("",4);

             logger.log("=========================",4);

             logger.log("done=====================",4);

             logger.log("=========================",4);

             ind.program.set_weights(best_weights);

         }


         // forward pass

         vector<Trace> AutoBackProp::forward_prop(Individual& ind, const Data& d,

                                                  MatrixXf& Phi, const Parameters& params)

         {

             /* cout << "Forward pass\n"; */

             // Iterate through all the nodes evaluating and tracking ouputs

             vector<Trace> stack_trace;

             Phi = ind.out_trace(d, stack_trace);

             // Use stack_f and execution stack to avoid issue of branches affecting what elements

             // appear before a node

             /* cout << "Returning forward pass.\n"; */

             return stack_trace;

         }

         // Updates stacks to have proper value on top

         void AutoBackProp::next_branch(vector<BP_NODE>& executing, vector<Node*>& bp_program,

                                        vector<ArrayXf>& derivatives)

         {

             // While there are still nodes with branches to explore

             if(!executing.empty()) {

                 // Declare variable to hold node and its associated derivatives

                 BP_NODE bp_node = pop<BP_NODE>(&executing); // Check first element

                 // Loop until branch to explore is found

                 while (bp_node.deriv_list.empty() && !executing.empty()) {

                     bp_node = pop<BP_NODE>(&executing); // Get node and its derivatves


                     // For some reason this function is not removing element from the stack

                     pop<ArrayXf>(&derivatives); // Remove associated gradients from stack

                     if (executing.empty()) {

                         return;

                     }

                 }


                 // Should now have the next parent node and derivatves (stored in bp_node)

                 if (!bp_node.deriv_list.empty())

                 {

                     bp_program.push_back(bp_node.n);

                     // Pull derivative from front of list due to how we stored them earlier

                     derivatives.push_back(pop_front<ArrayXf>(&(bp_node.deriv_list)));

                     // Push it back on the stack in order to sync all the stacks

                     executing.push_back(bp_node);

                 }

             }

         }


         // Compute gradients and update weights

         void AutoBackProp::backprop(Trace& stack, NodeVector& program, int start, int end,

                                     float Beta, shared_ptr<CLabels>& yhat,

                                     const Data& d,

                                     vector<float> sw)

         {

             /* cout << "Backward pass \n"; */

             vector<ArrayXf> derivatives;

             // start with derivative of cost function wrt ML output times dyhat/dprogram output, which

             // is equal to the weight the model assigned to this subprogram (Beta)

             // push back derivative of cost function wrt ML output

             /* cout << "Beta: " << Beta << "\n"; */

             derivatives.push_back(this->d_cost_func(d.y, yhat, sw).array() * Beta); //*phi.array());

             /* cout << "Cost derivative: " << derivatives[derivatives.size() -1 ]<< "\n"; */

             // Working according to test program */

             /* pop<ArrayXf>(&f_stack); // Get rid of input to cost function */

             vector<BP_NODE> executing; // Stores node and its associated derivatves

             // Currently I don't think updates will be saved, might want a pointer of nodes so don't

             // have to restock the list

             // Program we loop through and edit during algorithm (is this a shallow or deep copy?)

             /* cout << "copy program \n"; */

             vector<Node*> bp_program = program.get_data(start, end);

             /* cout << "Initializing backprop systems.\n"; */

             while (bp_program.size() > 0) {

                 /* cout << "Size of program: " << bp_program.size() << "\n"; */

                 Node* node = pop<Node*>(&bp_program);

                 /* cout << "Evaluating: " << node->name << "\n"; */

                 /* cout << "executing stack: " ; */

                 /* for (const auto& bpe : executing) cout << bpe.n->name << " " ; cout << "\n"; */

                 /* cout << "bp_program: " ; */

                 /* for (const auto& bpe : bp_program) cout << bpe->name << " " ; cout << "\n"; */

                 /* cout << "derivatives size: " << derivatives.size() << "\n"; */

                 vector<ArrayXf> n_derivatives;


                 if (node->isNodeDx() && node->visits == 0 && node->arity.at('f') > 0) {

                     NodeDx* dNode = dynamic_cast<NodeDx*>(node); // Could probably put this up one and have the if condition check if null

                     /* cout << "evaluating derivative\n"; */

                     // Calculate all the derivatives and store them, then update all the weights and throw away the node

                     for (int i = 0; i < node->arity.at('f'); i++) {

                         dNode->derivative(n_derivatives, stack, i);

                     }

                     /* cout << "updating derivatives\n"; */

                     dNode->update(derivatives, stack, this->epk, this->a);

                     // dNode->print_weight();

                     /* cout << "popping input arguments\n"; */

                     // Get rid of the input arguments for the node

                     for (int i = 0; i < dNode->arity.at('f'); i++) {

                         pop<ArrayXf>(&stack.f);

                     }

                     for (int i = 0; i < dNode->arity.at('b'); i++) {

                         pop<ArrayXb>(&stack.b);

                     }

                     for (int i = 0; i < dNode->arity.at('c'); i++) {

                         pop<ArrayXi>(&stack.c);

                     }

                     if (!n_derivatives.empty()) {

                         derivatives.push_back(pop_front<ArrayXf>(&n_derivatives));

                     }


                     executing.push_back({dNode, n_derivatives});

                 }

                 /* else */

                 /*     cout << "not NodeDx or visits reached or no floating arity\n"; */

                 /* cout << "next branch\n"; */

                 // Choosing how to move through tree

                 if (node->arity.at('f') == 0 || !node->isNodeDx()) {


                     // Clean up gradients and find the parent node

                     /* cout << "popping derivatives\n"; */

                     if (!derivatives.empty())

                         pop<ArrayXf>(&derivatives); // TODO check if this fixed

                     next_branch(executing, bp_program, derivatives);

                 }

                 else

                 {

                     node->visits += 1;

                     if (node->visits > node->arity.at('f'))

                     {

                         next_branch(executing, bp_program, derivatives);

                     }

                 }

             }


             // point bp_program to null

             for (unsigned i = 0; i < bp_program.size(); ++i)

                 bp_program.at(i) = nullptr;


             /* cout << "Backprop terminated\n"; */

             //print_weights(program);

         }

     }

 }


auto_backprop.h

FT::Dat::DataRef
Definition: data.h:74

FT::Dat::DataRef::t
Data * t
Definition: data.h:93

FT::Dat::DataRef::v
Data * v
Definition: data.h:92

FT::Dat::DataRef::train_test_split
void train_test_split(bool shuffle, float split)
splits data into training and validation folds.
Definition: data.cc:362

FT::Dat::Data
data holding X, y, and Z data
Definition: data.h:42

FT::Dat::Data::y
VectorXf & y
Definition: data.h:46

FT::Dat::Data::get_batch
void get_batch(Data &db, int batch_size) const
select random subset of data for training weights.
Definition: data.cc:79

FT::Dat::Data::classification
bool classification
Definition: data.h:48

FT::Dat::Data::Z
LongData & Z
Definition: data.h:47

FT::Dat::Data::X
MatrixXf & X
Definition: data.h:45

FT::Opt::AutoBackProp::run
void run(Individual &ind, const Data &d, const Parameters &params)
adapt weights
Definition: auto_backprop.cc:60

FT::Opt::AutoBackProp::d_cost_func
callback d_cost_func
Definition: auto_backprop.h:90

FT::Opt::AutoBackProp::epk
float epk
Definition: auto_backprop.h:94

FT::Opt::AutoBackProp::score_hash
std::map< string, callback > score_hash
Definition: auto_backprop.h:72

FT::Opt::AutoBackProp::epT
float epT
Definition: auto_backprop.h:95

FT::Opt::AutoBackProp::cost_func
callback cost_func
Definition: auto_backprop.h:91

FT::Opt::AutoBackProp::forward_prop
vector< Trace > forward_prop(Individual &ind, const Data &d, MatrixXf &Phi, const Parameters &params)
Return the f_stack.
Definition: auto_backprop.cc:191

FT::Opt::AutoBackProp::backprop
void backprop(Trace &f_stack, NodeVector &program, int start, int end, float Beta, shared_ptr< CLabels > &yhat, const Data &d, vector< float > sw)
Compute gradients and update weights.
Definition: auto_backprop.cc:235

FT::Opt::AutoBackProp::d_score_hash
std::map< string, callback > d_score_hash
Definition: auto_backprop.h:71

FT::Opt::AutoBackProp::next_branch
void next_branch(vector< BP_NODE > &executing, vector< Node * > &bp_program, vector< ArrayXf > &derivatives)
Updates stacks to have proper value on top.
Definition: auto_backprop.cc:204

FT::Opt::AutoBackProp::a
float a
Definition: auto_backprop.h:89

FT::Opt::AutoBackProp::n
float n
Definition: auto_backprop.h:88

FT::Opt::AutoBackProp::iters
int iters
Definition: auto_backprop.h:93

FT::Opt::AutoBackProp::print_weights
void print_weights(NodeVector &program)
Definition: auto_backprop.cc:42

FT::Opt::AutoBackProp::AutoBackProp
AutoBackProp(string scorer, int iters=1000, float n=0.1, float a=0.9)
Definition: auto_backprop.cc:16

FT::Pop::Individual
individual programs in the population
Definition: individual.h:31

FT::Pop::Individual::out
MatrixXf out(const Data &d, bool predict=false)
calculate program output matrix Phi
Definition: individual.cc:391

FT::Pop::Individual::dtypes
vector< char > dtypes
the data types of each column of the
Definition: individual.h:51

FT::Pop::Individual::program
NodeVector program
executable data structure
Definition: individual.h:33

FT::Pop::Individual::out_trace
MatrixXf out_trace(const Data &d, vector< Trace > &stack_trace)
calculate program output while maintaining stack trace
Definition: individual.cc:544

FT::Pop::Op::NodeDx
Definition: n_Dx.h:14

FT::Pop::Op::NodeDx::update
void update(vector< ArrayXf > &gradients, Trace &state, float n, float a)
Definition: n_Dx.cc:15

FT::Pop::Op::NodeDx::derivative
void derivative(vector< ArrayXf > &gradients, Trace &state, int loc)
Definition: n_Dx.cc:10

FT::Pop::Op::NodeDx::W
std::vector< float > W
Definition: n_Dx.h:16

FT::Pop::Op::Node
Represents nodes in a program.
Definition: node.h:54

FT::Pop::Op::Node::arity
std::map< char, unsigned int > arity
arity of the operator
Definition: node.h:59

FT::Pop::Op::Node::visits
int visits
Definition: node.h:61

FT::Pop::Op::Node::isNodeDx
virtual bool isNodeDx()
check of node type
Definition: node.h:86

FT::Util::Logger::log
string log(string m, int v, string sep="\n") const
print message with verbosity control.
Definition: logger.cc:54

LongData
std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > LongData
Definition: data.h:23

FT::Eval::multi_log_loss
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &confidences, const vector< float > &class_weights)
multinomial log loss
Definition: metrics.cc:191

FT::Eval::d_squared_difference
VectorXf d_squared_difference(const VectorXf &y, const VectorXf &yhat)
Definition: metrics.cc:44

FT::Eval::log_loss
VectorXf log_loss(const VectorXf &y, const VectorXf &yhat, const vector< float > &class_weights)
Definition: metrics.cc:88

FT::Eval::squared_difference
VectorXf squared_difference(const VectorXf &y, const VectorXf &yhat)
Definition: metrics.cc:18

FT::Eval::d_log_loss
VectorXf d_log_loss(const VectorXf &y, const VectorXf &yhat, const vector< float > &class_weights)
Definition: metrics.cc:159

FT::Eval::d_multi_log_loss
VectorXf d_multi_log_loss(const VectorXf &y, shared_ptr< CLabels > &labels, const vector< float > &class_weights)
derivative of multinomial log loss
Definition: metrics.cc:303

FT::Util::isinf
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
Definition: utils.cc:217

FT::Util::isnan
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
Definition: utils.cc:226

FT::Util::logger
static Logger & logger
Definition: logger.h:46

FT::Util::to_string
std::string to_string(const T &value)
template function to convert objects to string for logging
Definition: utils.h:422

FT
main Feat namespace
Definition: data.cc:13

FT::i
int i
Definition: params.cc:552

FT::NEAR_ZERO
static float NEAR_ZERO
Definition: init.h:46

FT::Dat::Trace
used for tracing stack outputs for backprop algorithm.
Definition: state.h:232

FT::Dat::Trace::f
vector< ArrayXf > f
Definition: state.h:233

FT::Dat::Trace::b
vector< ArrayXb > b
Definition: state.h:235

FT::Dat::Trace::c
vector< ArrayXi > c
Definition: state.h:234

FT::Opt::BP_NODE
Definition: auto_backprop.h:42

FT::Opt::BP_NODE::n
NodeDx * n
Definition: auto_backprop.h:43

FT::Opt::BP_NODE::deriv_list
vector< ArrayXf > deriv_list
Definition: auto_backprop.h:44

FT::Parameters::BP::batch_size
int batch_size
Definition: params.h:87

FT::Parameters
holds the hyperparameters for Feat.
Definition: params.h:25

FT::Parameters::classification
bool classification
flag to conduct classification rather than
Definition: params.h:32

FT::Parameters::class_weights
vector< float > class_weights
weights for each class
Definition: params.h:60

FT::Parameters::n_classes
unsigned int n_classes
number of classes for classification
Definition: params.h:57

FT::Parameters::ml
string ml
machine learner used with Feat
Definition: params.h:31

FT::Parameters::verbosity
int verbosity
Definition: params.h:39

FT::Parameters::bp
BP bp
backprop parameters
Definition: params.h:92

FT::Pop::NodeVector
an extension of a vector of unique pointers to nodes
Definition: nodevector.h:23

FT::Pop::NodeVector::roots
vector< size_t > roots() const
returns indices of root nodes
Definition: nodevector.cc:55

FT::Pop::NodeVector::get_data
vector< Node * > get_data(int start=0, int end=0)
returns vector of raw pointers to nodes in [start,end], or all if both are zero
Definition: nodevector.cc:38

FT::Pop::NodeVector::set_weights
void set_weights(vector< vector< float >> &weights)
Definition: nodevector.cc:150

FT::Pop::NodeVector::get_weights
vector< vector< float > > get_weights()
Definition: nodevector.cc:189

FT::Pop::NodeVector::subtree
size_t subtree(size_t i, char otype='0', string indent="> ") const
Definition: nodevector.cc:80