Feat C++ API
A feature engineering automation tool
FT::Feat Class Reference

main class for the Feat learner. More...

#include <feat.h>

Collaboration diagram for FT::Feat:

Public Member Functions

 Feat ()
 member initializer list constructor More...
 
 ~Feat ()
 destructor
More...
 
void init ()
 initialize Feat object for fitting. More...
 
void set_is_fitted (bool f)
 set flag indicating whether fit has been called More...
 
bool get_is_fitted ()
 
void set_pop_size (int pop_size)
 set size of population More...
 
int get_pop_size ()
 return population size More...
 
void set_gens (int gens)
 set size of max generations
More...
 
int get_gens ()
 return size of max generations More...
 
void set_ml (string ml)
 set ML algorithm to use
More...
 
string get_ml ()
 return ML algorithm string More...
 
void set_classification (bool classification)
 set EProblemType for shogun
More...
 
bool get_classification ()
 return type of classification flag set More...
 
void set_verbosity (int verbosity)
 set level of debug info
More...
 
int get_verbosity ()
 return current verbosity level set More...
 
void set_max_stall (int max_stall)
 set maximum stall in learning, in generations More...
 
int get_max_stall ()
 return maximum stall in learning, in generations More...
 
void set_selection (string sel)
 set selection method
More...
 
void set_survival (string surv)
 set survivability
More...
 
float get_cross_rate ()
 return cross rate for variation More...
 
void set_cross_rate (float cross_rate)
 set cross rate in variation
More...
 
void set_root_xo_rate (float cross_rate)
 set root xo rate in variation
More...
 
float get_root_xo_rate ()
 
vector< char > get_otypes ()
 return program output type ('f', 'b')
More...
 
char get_otype ()
 return parameter otype, used to set otypes More...
 
void set_otype (char ot)
 set program output type ('f', 'b')
More...
 
void set_functions (const vector< string > &fns)
 sets available functions based on comma-separated list. More...
 
vector< string > get_functions ()
 
int get_max_depth ()
 return max_depth of programs More...
 
void set_max_depth (unsigned int max_depth)
 set max depth of programs
More...
 
int get_max_dim ()
 return max dimensionality of programs More...
 
void set_max_dim (unsigned int max_dim)
 set maximum dimensionality of programs
More...
 
void set_random_state (int random_state)
 set dimensionality as multiple of the number of columns More...
 
int get_random_state ()
 
int get_random_state_ ()
 returns the actual seed determined by the input argument. More...
 
bool get_erc ()
 return boolean value of erc flag More...
 
void set_erc (bool erc)
 flag to set whether to use variable or constants for terminals More...
 
bool get_shuffle ()
 return whether option to shuffle the data is set or not More...
 
void set_shuffle (bool sh)
 flag to shuffle the input samples for train/test splits More...
 
float get_split ()
 return fraction of data to use for training More...
 
void set_split (float sp)
 set train fraction of dataset More...
 
vector< char > get_dtypes ()
 return data types for input parameters More...
 
void set_dtypes (vector< char > dtypes)
 set data types for input parameters More...
 
float get_fb ()
 get feedback setting More...
 
void set_fb (float fb)
 set feedback More...
 
string get_logfile ()
 get name More...
 
void set_logfile (string s)
 set name for files More...
 
string get_scorer ()
 
void set_scorer (string s)
 set scoring function More...
 
string get_scorer_ ()
 
void set_feature_names (string s)
 
string get_feature_names ()
 
void set_backprop (bool bp)
 set constant optimization options More...
 
bool get_backprop ()
 
void set_simplify (float s)
 
float get_simplify ()
 
void set_corr_delete_mutate (bool s)
 
bool get_corr_delete_mutate ()
 
void set_hillclimb (bool hc)
 
bool get_hillclimb ()
 
void set_iters (int iters)
 
int get_iters ()
 
void set_lr (float lr)
 
float get_lr ()
 
int get_batch_size ()
 
void set_batch_size (int bs)
 
void set_n_jobs (unsigned t)
 set number of threads More...
 
int get_n_jobs ()
 
void set_max_time (int time)
 set max time in seconds for fit method More...
 
int get_max_time ()
 
void set_use_batch ()
 set flag to use batch for training More...
 
void set_residual_xo (bool res_xo=true)
 use residual crossover More...
 
bool get_residual_xo ()
 
void set_stagewise_xo (bool sem_xo=true)
 use stagewise crossover More...
 
bool get_stagewise_xo ()
 
void set_stagewise_xo_tol (int tol)
 
int get_stagewise_xo_tol ()
 
void set_softmax_norm (bool sftmx=true)
 use softmax More...
 
bool get_softmax_norm ()
 
void set_save_pop (int pp)
 
int get_save_pop ()
 
void set_starting_pop (string sp)
 
string get_starting_pop ()
 
void set_normalize (bool in)
 
bool get_normalize ()
 
string get_sel ()
 
void set_sel (string in)
 
string get_surv ()
 
void set_surv (string in)
 
bool get_tune_initial ()
 
void set_tune_initial (bool in)
 
bool get_tune_final ()
 
void set_tune_final (bool in)
 
auto get_objectives ()
 get objectives for multi-objective search More...
 
void set_objectives (const vector< string > &obj)
 set objectives for multi-objective search More...
 
string get_protected_groups ()
 
void set_protected_groups (string pg)
 set protected groups for fairness More...
 
bool get_val_from_arch ()
 
void set_val_from_arch (bool in)
 
int get_archive_size ()
 return archive size More...
 
int get_max_size ()
 return max size of programs More...
 
int get_num_features ()
 return number of features More...
 
string get_representation ()
 return best model More...
 
string get_model (bool sort=true)
 return best model, in tabular form More...
 
string get_ind_eqn (bool sort, Individual &ind)
 return best model as a single line equation More...
 
string get_eqn (bool sort=false)
 
int get_n_params ()
 get number of parameters in best More...
 
int get_dim ()
 get dimensionality of best More...
 
int get_complexity ()
 get dimensionality of best More...
 
vector< nl::json > get_archive (bool front)
 return population as string More...
 
ArrayXf get_coefs ()
 return the coefficients or importance scores of the best model. More...
 
int get_n_nodes ()
 return the number of nodes in the best model More...
 
LongData get_Z (string s, int *idx, int idx_size)
 get longitudinal data from file s More...
 
void fit (MatrixXf &X, VectorXf &y)
 train a model.
More...
 
void fit (MatrixXf &X, VectorXf &y, LongData &Z)
 
void run_generation (unsigned int g, vector< size_t > survivors, DataRef &d, std::ofstream &log, float percentage, unsigned &stall_count)
 
VectorXf predict (MatrixXf &X, LongData &Z)
 predict on unseen data.
More...
 
VectorXf predict (MatrixXf &X)
 
VectorXf predict_archive (int id, MatrixXf &X)
 predict on unseen data from the whole archive
More...
 
VectorXf predict_archive (int id, MatrixXf &X, LongData &Z)
 
ArrayXXf predict_proba_archive (int id, MatrixXf &X, LongData &Z)
 
ArrayXXf predict_proba_archive (int id, MatrixXf &X)
 
shared_ptr< CLabels > predict_labels (MatrixXf &X, LongData Z=LongData())
 predict on unseen data. return CLabels. More...
 
ArrayXXf predict_proba (MatrixXf &X, LongData &Z)
 predict probabilities of each class. More...
 
ArrayXXf predict_proba (MatrixXf &X)
 
MatrixXf transform (MatrixXf &X)
 transform an input matrix using a program.
More...
 
MatrixXf transform (MatrixXf &X, LongData &Z)
 
MatrixXf transform (MatrixXf &X, LongData Z, Individual *ind)
 
float score (MatrixXf &X, const VectorXf &y, LongData Z=LongData())
 scoring function More...
 
nl::json get_stats ()
 return statistics from the run as a json string More...
 
void load_best_ind (string filename)
 load best_ind from file More...
 
void load_population (string filename, bool justfront=false)
 load population from file, optionall just Pareto front More...
 
void load (const json &j)
 load Feat state from a json string. More...
 
void load_from_file (string filename)
 load Feat state from file. More...
 
json save () const
 save and return a json Feat state as string. More...
 
void save_to_file (string filename)
 save Feat state to file. More...
 

Public Attributes

bool is_fitted
 keeps track of whether fit was called. More...
 

Private Member Functions

bool update_best (const DataRef &d, bool val=false)
 updates best score More...
 
void calculate_stats (const DataRef &d)
 calculate and print stats More...
 
void print_stats (std::ofstream &log, float fraction)
 
void log_stats (std::ofstream &log)
 
vector< float > univariate_initial_model (DataRef &d, int n_feats)
 
void initial_model (DataRef &d)
 method to fit inital ml model
More...
 
void final_model (DataRef &d)
 fits final model to best transformation More...
 
void simplify_model (DataRef &d, Individual &)
 simplifies final model to best transformation More...
 
void update_stall_count (unsigned &stall_count, bool updated)
 updates stall count for early stopping More...
 
 NLOHMANN_DEFINE_TYPE_INTRUSIVE (Feat, params, pop, selector, survivor, archive, use_arch, survival, N, min_loss, min_loss_v, best_med_score, best_complexity, str_dim, starting_pop, best_ind, is_fitted)
 

Private Attributes

Parameters params
 hyperparameters of Feat More...
 
Timer timer
 start time of training More...
 
Population pop
 population of programs More...
 
Selection selector
 selection algorithm More...
 
Evaluation evaluator
 evaluation code More...
 
Variation variator
 variation operators More...
 
Selection survivor
 survival algorithm More...
 
Archive archive
 pareto front archive More...
 
bool use_arch
 internal control over use of archive More...
 
string survival
 stores survival mode More...
 
Normalizer N
 scales training data. More...
 
float min_loss
 current best score More...
 
float min_loss_v
 best validation score More...
 
float best_med_score
 best median population score More...
 
int best_complexity
 complexity of the best model More...
 
string str_dim
 dimensionality as multiple of number of cols More...
 
string starting_pop
 file with starting population More...
 
Individual best_ind
 best individual More...
 
string logfile
 log filename More...
 
int save_pop
 controls whether pop is printed each gen More...
 
bool val_from_arch
 model selection only uses Pareto front More...
 
float simplify
 post-run simplification More...
 
Log_Stats stats
 runtime stats More...
 

Detailed Description

main class for the Feat learner.

Feat optimizes feature represenations for a given machine learning algorithm. It does so by using evolutionary computation to optimize a population of programs. Each program represents a set of feature transformations.

Definition at line 72 of file feat.h.

Constructor & Destructor Documentation

◆ Feat()

FT::Feat::Feat ( )
inline

member initializer list constructor

Definition at line 101 of file feat.h.

◆ ~Feat()

FT::Feat::~Feat ( )
inline

destructor

Definition at line 103 of file feat.h.

Member Function Documentation

◆ calculate_stats()

void Feat::calculate_stats ( const DataRef d)
private

calculate and print stats

Definition at line 1338 of file feat.cc.

◆ final_model()

void Feat::final_model ( DataRef d)
private

fits final model to best transformation

Definition at line 770 of file feat.cc.

◆ fit() [1/2]

void Feat::fit ( MatrixXf &  X,
VectorXf &  y 
)

train a model.

Definition at line 668 of file feat.cc.

◆ fit() [2/2]

void Feat::fit ( MatrixXf &  X,
VectorXf &  y,
LongData Z 
)

Input:

 X: n_features x n_samples MatrixXf of features
 y: VectorXf of labels 

Output:

 updates best_estimator, hof

steps:

  1. fit model yhat = f(X)
  2. generate transformations Phi(X) for each individual
  3. fit model yhat_new = f( Phi(X)) for each individual
  4. evaluate features
  5. selection parents
  6. produce offspring from parents via variation
  7. select surviving individuals from parents and offspring

< log file stream

Definition at line 46 of file feat.cc.

◆ get_archive()

vector< json > Feat::get_archive ( bool  front)

return population as string

Definition at line 577 of file feat.cc.

◆ get_archive_size()

int FT::Feat::get_archive_size ( )
inline

return archive size

Definition at line 312 of file feat.h.

◆ get_backprop()

bool FT::Feat::get_backprop ( )
inline

Definition at line 228 of file feat.h.

◆ get_batch_size()

int FT::Feat::get_batch_size ( )
inline

Definition at line 245 of file feat.h.

◆ get_classification()

bool Feat::get_classification ( )

return type of classification flag set

Definition at line 428 of file feat.cc.

◆ get_coefs()

ArrayXf Feat::get_coefs ( )

return the coefficients or importance scores of the best model.

Definition at line 649 of file feat.cc.

◆ get_complexity()

int Feat::get_complexity ( )

get dimensionality of best

Definition at line 570 of file feat.cc.

◆ get_corr_delete_mutate()

bool FT::Feat::get_corr_delete_mutate ( )
inline

Definition at line 234 of file feat.h.

◆ get_cross_rate()

float Feat::get_cross_rate ( )

return cross rate for variation

Definition at line 443 of file feat.cc.

◆ get_dim()

int Feat::get_dim ( )

get dimensionality of best

Definition at line 567 of file feat.cc.

◆ get_dtypes()

vector< char > Feat::get_dtypes ( )

return data types for input parameters

add custom node into feat

return data types for input parameters

Definition at line 470 of file feat.cc.

◆ get_eqn()

string Feat::get_eqn ( bool  sort = false)

Definition at line 478 of file feat.cc.

◆ get_erc()

bool Feat::get_erc ( )

return boolean value of erc flag

Definition at line 452 of file feat.cc.

◆ get_fb()

float Feat::get_fb ( )

get feedback setting

return feedback setting

Definition at line 473 of file feat.cc.

◆ get_feature_names()

string FT::Feat::get_feature_names ( )
inline

Definition at line 224 of file feat.h.

◆ get_functions()

vector<string> FT::Feat::get_functions ( )
inline

Definition at line 164 of file feat.h.

◆ get_gens()

int Feat::get_gens ( )

return size of max generations

Definition at line 422 of file feat.cc.

◆ get_hillclimb()

bool FT::Feat::get_hillclimb ( )
inline

Definition at line 237 of file feat.h.

◆ get_ind_eqn()

string Feat::get_ind_eqn ( bool  sort,
Individual ind 
)

return best model as a single line equation

Definition at line 480 of file feat.cc.

◆ get_is_fitted()

bool FT::Feat::get_is_fitted ( )
inline

Definition at line 108 of file feat.h.

◆ get_iters()

int FT::Feat::get_iters ( )
inline

Definition at line 240 of file feat.h.

◆ get_logfile()

string Feat::get_logfile ( )

get name

Definition at line 455 of file feat.cc.

◆ get_lr()

float FT::Feat::get_lr ( )
inline

Definition at line 243 of file feat.h.

◆ get_max_depth()

int Feat::get_max_depth ( )

return max_depth of programs

Definition at line 440 of file feat.cc.

◆ get_max_dim()

int Feat::get_max_dim ( )

return max dimensionality of programs

Definition at line 449 of file feat.cc.

◆ get_max_size()

int Feat::get_max_size ( )

return max size of programs

Definition at line 446 of file feat.cc.

◆ get_max_stall()

int Feat::get_max_stall ( )

return maximum stall in learning, in generations

Definition at line 431 of file feat.cc.

◆ get_max_time()

int FT::Feat::get_max_time ( )
inline

Definition at line 254 of file feat.h.

◆ get_ml()

string Feat::get_ml ( )

return ML algorithm string

Definition at line 425 of file feat.cc.

◆ get_model()

string Feat::get_model ( bool  sort = true)

return best model, in tabular form

Definition at line 527 of file feat.cc.

◆ get_n_jobs()

int FT::Feat::get_n_jobs ( )
inline

Definition at line 250 of file feat.h.

◆ get_n_nodes()

int Feat::get_n_nodes ( )

return the number of nodes in the best model

Definition at line 574 of file feat.cc.

◆ get_n_params()

int Feat::get_n_params ( )

get number of parameters in best

Definition at line 564 of file feat.cc.

◆ get_normalize()

bool FT::Feat::get_normalize ( )
inline

Definition at line 281 of file feat.h.

◆ get_num_features()

int Feat::get_num_features ( )

return number of features

Definition at line 458 of file feat.cc.

◆ get_objectives()

auto FT::Feat::get_objectives ( )
inline

get objectives for multi-objective search

Definition at line 297 of file feat.h.

◆ get_otype()

char FT::Feat::get_otype ( )
inline

return parameter otype, used to set otypes

Definition at line 158 of file feat.h.

◆ get_otypes()

vector< char > Feat::get_otypes ( )

return program output type ('f', 'b')

Definition at line 434 of file feat.cc.

◆ get_pop_size()

int Feat::get_pop_size ( )

return population size

Definition at line 419 of file feat.cc.

◆ get_protected_groups()

string FT::Feat::get_protected_groups ( )
inline

Definition at line 301 of file feat.h.

◆ get_random_state()

int FT::Feat::get_random_state ( )
inline

Definition at line 181 of file feat.h.

◆ get_random_state_()

int FT::Feat::get_random_state_ ( )
inline

returns the actual seed determined by the input argument.

Definition at line 183 of file feat.h.

◆ get_representation()

string Feat::get_representation ( )

return best model

Definition at line 476 of file feat.cc.

◆ get_residual_xo()

bool FT::Feat::get_residual_xo ( )
inline

Definition at line 261 of file feat.h.

◆ get_root_xo_rate()

float FT::Feat::get_root_xo_rate ( )
inline

Definition at line 153 of file feat.h.

◆ get_save_pop()

int FT::Feat::get_save_pop ( )
inline

Definition at line 275 of file feat.h.

◆ get_scorer()

string Feat::get_scorer ( )

Definition at line 382 of file feat.cc.

◆ get_scorer_()

string Feat::get_scorer_ ( )

Definition at line 381 of file feat.cc.

◆ get_sel()

string FT::Feat::get_sel ( )
inline

Definition at line 283 of file feat.h.

◆ get_shuffle()

bool Feat::get_shuffle ( )

return whether option to shuffle the data is set or not

Definition at line 461 of file feat.cc.

◆ get_simplify()

float FT::Feat::get_simplify ( )
inline

Definition at line 231 of file feat.h.

◆ get_softmax_norm()

bool FT::Feat::get_softmax_norm ( )
inline

Definition at line 272 of file feat.h.

◆ get_split()

float Feat::get_split ( )

return fraction of data to use for training

Definition at line 464 of file feat.cc.

◆ get_stagewise_xo()

bool FT::Feat::get_stagewise_xo ( )
inline

Definition at line 265 of file feat.h.

◆ get_stagewise_xo_tol()

int FT::Feat::get_stagewise_xo_tol ( )
inline

Definition at line 268 of file feat.h.

◆ get_starting_pop()

string FT::Feat::get_starting_pop ( )
inline

Definition at line 278 of file feat.h.

◆ get_stats()

json Feat::get_stats ( )

return statistics from the run as a json string

Definition at line 1566 of file feat.cc.

◆ get_surv()

string FT::Feat::get_surv ( )
inline

Definition at line 286 of file feat.h.

◆ get_tune_final()

bool FT::Feat::get_tune_final ( )
inline

Definition at line 292 of file feat.h.

◆ get_tune_initial()

bool FT::Feat::get_tune_initial ( )
inline

Definition at line 289 of file feat.h.

◆ get_val_from_arch()

bool FT::Feat::get_val_from_arch ( )
inline

Definition at line 305 of file feat.h.

◆ get_verbosity()

int Feat::get_verbosity ( )

return current verbosity level set

Definition at line 437 of file feat.cc.

◆ get_Z()

std::map< string, std::pair< vector< ArrayXf >, vector< ArrayXf > > > Feat::get_Z ( string  s,
int *  idx,
int  idx_size 
)

get longitudinal data from file s

Definition at line 657 of file feat.cc.

◆ init()

void Feat::init ( )

initialize Feat object for fitting.

Definition at line 24 of file feat.cc.

◆ initial_model()

void Feat::initial_model ( DataRef d)
private

method to fit inital ml model

fits an ML model to the raw data as a starting point.

Definition at line 1043 of file feat.cc.

◆ load()

void Feat::load ( const json &  j)

load Feat state from a json string.

Definition at line 1584 of file feat.cc.

◆ load_best_ind()

void Feat::load_best_ind ( string  filename)

load best_ind from file

Definition at line 1573 of file feat.cc.

◆ load_from_file()

void Feat::load_from_file ( string  filename)

load Feat state from file.

Definition at line 1597 of file feat.cc.

◆ load_population()

void Feat::load_population ( string  filename,
bool  justfront = false 
)

load population from file, optionall just Pareto front

Definition at line 1579 of file feat.cc.

◆ log_stats()

void Feat::log_stats ( std::ofstream &  log)
private

Definition at line 1536 of file feat.cc.

◆ NLOHMANN_DEFINE_TYPE_INTRUSIVE()

FT::Feat::NLOHMANN_DEFINE_TYPE_INTRUSIVE ( Feat  ,
params  ,
pop  ,
selector  ,
survivor  ,
archive  ,
use_arch  ,
survival  ,
N  ,
min_loss  ,
min_loss_v  ,
best_med_score  ,
best_complexity  ,
str_dim  ,
starting_pop  ,
best_ind  ,
is_fitted   
)
private

◆ predict() [1/2]

VectorXf Feat::predict ( MatrixXf &  X)

Definition at line 1178 of file feat.cc.

◆ predict() [2/2]

VectorXf Feat::predict ( MatrixXf &  X,
LongData Z 
)

predict on unseen data.

Definition at line 1184 of file feat.cc.

◆ predict_archive() [1/2]

VectorXf Feat::predict_archive ( int  id,
MatrixXf &  X 
)

predict on unseen data from the whole archive

Definition at line 1195 of file feat.cc.

◆ predict_archive() [2/2]

VectorXf Feat::predict_archive ( int  id,
MatrixXf &  X,
LongData Z 
)

Definition at line 1201 of file feat.cc.

◆ predict_labels()

shared_ptr< CLabels > Feat::predict_labels ( MatrixXf &  X,
LongData  Z = LongData() 
)

predict on unseen data. return CLabels.

Definition at line 1269 of file feat.cc.

◆ predict_proba() [1/2]

ArrayXXf Feat::predict_proba ( MatrixXf &  X)

Definition at line 1289 of file feat.cc.

◆ predict_proba() [2/2]

ArrayXXf Feat::predict_proba ( MatrixXf &  X,
LongData Z 
)

predict probabilities of each class.

Definition at line 1280 of file feat.cc.

◆ predict_proba_archive() [1/2]

ArrayXXf Feat::predict_proba_archive ( int  id,
MatrixXf &  X 
)

Definition at line 1242 of file feat.cc.

◆ predict_proba_archive() [2/2]

ArrayXXf Feat::predict_proba_archive ( int  id,
MatrixXf &  X,
LongData Z 
)

Definition at line 1247 of file feat.cc.

◆ print_stats()

void Feat::print_stats ( std::ofstream &  log,
float  fraction 
)
private

Definition at line 1431 of file feat.cc.

◆ run_generation()

void Feat::run_generation ( unsigned int  g,
vector< size_t >  survivors,
DataRef d,
std::ofstream &  log,
float  percentage,
unsigned &  stall_count 
)

Definition at line 675 of file feat.cc.

◆ save()

json Feat::save ( ) const

save and return a json Feat state as string.

Definition at line 1590 of file feat.cc.

◆ save_to_file()

void Feat::save_to_file ( string  filename)

save Feat state to file.

Definition at line 1614 of file feat.cc.

◆ score()

float Feat::score ( MatrixXf &  X,
const VectorXf &  y,
LongData  Z = LongData() 
)

scoring function

Definition at line 1331 of file feat.cc.

◆ set_backprop()

void Feat::set_backprop ( bool  bp)

set constant optimization options

Definition at line 385 of file feat.cc.

◆ set_batch_size()

void Feat::set_batch_size ( int  bs)

Definition at line 397 of file feat.cc.

◆ set_classification()

void Feat::set_classification ( bool  classification)

set EProblemType for shogun

Definition at line 304 of file feat.cc.

◆ set_corr_delete_mutate()

void Feat::set_corr_delete_mutate ( bool  s)

Definition at line 389 of file feat.cc.

◆ set_cross_rate()

void Feat::set_cross_rate ( float  cross_rate)

set cross rate in variation

Definition at line 326 of file feat.cc.

◆ set_dtypes()

void Feat::set_dtypes ( vector< char >  dtypes)

set data types for input parameters

Definition at line 371 of file feat.cc.

◆ set_erc()

void Feat::set_erc ( bool  erc)

flag to set whether to use variable or constants for terminals

flag to set whether to use variable or constants for terminals

Definition at line 362 of file feat.cc.

◆ set_fb()

void Feat::set_fb ( float  fb)

set feedback

Definition at line 374 of file feat.cc.

◆ set_feature_names()

void FT::Feat::set_feature_names ( string  s)
inline

Definition at line 223 of file feat.h.

◆ set_functions()

void FT::Feat::set_functions ( const vector< string > &  fns)
inline

sets available functions based on comma-separated list.

Definition at line 163 of file feat.h.

◆ set_gens()

void Feat::set_gens ( int  gens)

set size of max generations

Definition at line 298 of file feat.cc.

◆ set_hillclimb()

void Feat::set_hillclimb ( bool  hc)

Definition at line 391 of file feat.cc.

◆ set_is_fitted()

void FT::Feat::set_is_fitted ( bool  f)
inline

set flag indicating whether fit has been called

Definition at line 107 of file feat.h.

◆ set_iters()

void Feat::set_iters ( int  iters)

Definition at line 393 of file feat.cc.

◆ set_logfile()

void Feat::set_logfile ( string  s)

set name for files

Definition at line 377 of file feat.cc.

◆ set_lr()

void Feat::set_lr ( float  lr)

Definition at line 395 of file feat.cc.

◆ set_max_depth()

void Feat::set_max_depth ( unsigned int  max_depth)

set max depth of programs

Definition at line 343 of file feat.cc.

◆ set_max_dim()

void Feat::set_max_dim ( unsigned int  max_dim)

set maximum dimensionality of programs

Definition at line 349 of file feat.cc.

◆ set_max_stall()

void Feat::set_max_stall ( int  max_stall)

set maximum stall in learning, in generations

Definition at line 313 of file feat.cc.

◆ set_max_time()

void Feat::set_max_time ( int  time)

set max time in seconds for fit method

Definition at line 406 of file feat.cc.

◆ set_ml()

void Feat::set_ml ( string  ml)

set ML algorithm to use

Definition at line 301 of file feat.cc.

◆ set_n_jobs()

void Feat::set_n_jobs ( unsigned  t)

set number of threads

Definition at line 404 of file feat.cc.

◆ set_normalize()

void FT::Feat::set_normalize ( bool  in)
inline

Definition at line 280 of file feat.h.

◆ set_objectives()

void FT::Feat::set_objectives ( const vector< string > &  obj)
inline

set objectives for multi-objective search

Definition at line 299 of file feat.h.

◆ set_otype()

void Feat::set_otype ( char  ot)

set program output type ('f', 'b')

Definition at line 339 of file feat.cc.

◆ set_pop_size()

void Feat::set_pop_size ( int  pop_size)

set size of population

Definition at line 295 of file feat.cc.

◆ set_protected_groups()

void Feat::set_protected_groups ( string  pg)

set protected groups for fairness

Definition at line 410 of file feat.cc.

◆ set_random_state()

void Feat::set_random_state ( int  rs)

set dimensionality as multiple of the number of columns

set seeds for each core's random number generator

Definition at line 355 of file feat.cc.

◆ set_residual_xo()

void FT::Feat::set_residual_xo ( bool  res_xo = true)
inline

use residual crossover

Definition at line 260 of file feat.h.

◆ set_root_xo_rate()

void Feat::set_root_xo_rate ( float  cross_rate)

set root xo rate in variation

set root cross rate in variation

Definition at line 333 of file feat.cc.

◆ set_save_pop()

void FT::Feat::set_save_pop ( int  pp)
inline

Definition at line 274 of file feat.h.

◆ set_scorer()

void Feat::set_scorer ( string  s)

set scoring function

Definition at line 380 of file feat.cc.

◆ set_sel()

void FT::Feat::set_sel ( string  in)
inline

Definition at line 284 of file feat.h.

◆ set_selection()

void Feat::set_selection ( string  sel)

set selection method

Definition at line 316 of file feat.cc.

◆ set_shuffle()

void Feat::set_shuffle ( bool  sh)

flag to shuffle the input samples for train/test splits

Definition at line 365 of file feat.cc.

◆ set_simplify()

void Feat::set_simplify ( float  s)

Definition at line 387 of file feat.cc.

◆ set_softmax_norm()

void FT::Feat::set_softmax_norm ( bool  sftmx = true)
inline

use softmax

Definition at line 271 of file feat.h.

◆ set_split()

void Feat::set_split ( float  sp)

set train fraction of dataset

Definition at line 368 of file feat.cc.

◆ set_stagewise_xo()

void FT::Feat::set_stagewise_xo ( bool  sem_xo = true)
inline

use stagewise crossover

Definition at line 264 of file feat.h.

◆ set_stagewise_xo_tol()

void FT::Feat::set_stagewise_xo_tol ( int  tol)
inline

Definition at line 267 of file feat.h.

◆ set_starting_pop()

void FT::Feat::set_starting_pop ( string  sp)
inline

Definition at line 277 of file feat.h.

◆ set_surv()

void FT::Feat::set_surv ( string  in)
inline

Definition at line 287 of file feat.h.

◆ set_survival()

void Feat::set_survival ( string  surv)

set survivability

Definition at line 319 of file feat.cc.

◆ set_tune_final()

void FT::Feat::set_tune_final ( bool  in)
inline

Definition at line 293 of file feat.h.

◆ set_tune_initial()

void FT::Feat::set_tune_initial ( bool  in)
inline

Definition at line 290 of file feat.h.

◆ set_use_batch()

void Feat::set_use_batch ( )

set flag to use batch for training

Definition at line 408 of file feat.cc.

◆ set_val_from_arch()

void FT::Feat::set_val_from_arch ( bool  in)
inline

Definition at line 306 of file feat.h.

◆ set_verbosity()

void Feat::set_verbosity ( int  verbosity)

set level of debug info

Definition at line 310 of file feat.cc.

◆ simplify_model()

void Feat::simplify_model ( DataRef d,
Individual ind 
)
private

simplifies final model to best transformation

Definition at line 786 of file feat.cc.

◆ transform() [1/3]

MatrixXf Feat::transform ( MatrixXf &  X)

transform an input matrix using a program.

Definition at line 1142 of file feat.cc.

◆ transform() [2/3]

MatrixXf Feat::transform ( MatrixXf &  X,
LongData Z 
)

Definition at line 1147 of file feat.cc.

◆ transform() [3/3]

MatrixXf Feat::transform ( MatrixXf &  X,
LongData  Z,
Individual ind 
)

Transforms input data according to ind or best ind, if ind is undefined.

Definition at line 1151 of file feat.cc.

◆ univariate_initial_model()

vector< float > Feat::univariate_initial_model ( DataRef d,
int  n_feats 
)
private

If there are more data variables than the max feature size can allow, we can't initialize a model in the population without some sort of feature selection. To select features we do the following: 1) fit univariate models to all features in X and store the coefficients 2) fit univariate models to all features in median(Z) and store the coefficients 3) set terminal weights according to the univariate scores 4) construct a program of dimensionality n_feats using the largest magnitude coefficients

Definition at line 979 of file feat.cc.

◆ update_best()

bool Feat::update_best ( const DataRef d,
bool  val = false 
)
private

updates best score

Definition at line 1296 of file feat.cc.

◆ update_stall_count()

void Feat::update_stall_count ( unsigned &  stall_count,
bool  updated 
)
private

updates stall count for early stopping

Definition at line 754 of file feat.cc.

Member Data Documentation

◆ archive

Archive FT::Feat::archive
private

pareto front archive

Definition at line 410 of file feat.h.

◆ best_complexity

int FT::Feat::best_complexity
private

complexity of the best model

Definition at line 418 of file feat.h.

◆ best_ind

Individual FT::Feat::best_ind
private

best individual

Definition at line 421 of file feat.h.

◆ best_med_score

float FT::Feat::best_med_score
private

best median population score

Definition at line 417 of file feat.h.

◆ evaluator

Evaluation FT::Feat::evaluator
private

evaluation code

Definition at line 407 of file feat.h.

◆ is_fitted

bool FT::Feat::is_fitted

keeps track of whether fit was called.

Definition at line 398 of file feat.h.

◆ logfile

string FT::Feat::logfile
private

log filename

Definition at line 422 of file feat.h.

◆ min_loss

float FT::Feat::min_loss
private

current best score

Definition at line 415 of file feat.h.

◆ min_loss_v

float FT::Feat::min_loss_v
private

best validation score

Definition at line 416 of file feat.h.

◆ N

Normalizer FT::Feat::N
private

scales training data.

Definition at line 413 of file feat.h.

◆ params

Parameters FT::Feat::params
private

hyperparameters of Feat

Definition at line 401 of file feat.h.

◆ pop

Population FT::Feat::pop
private

population of programs

Definition at line 405 of file feat.h.

◆ save_pop

int FT::Feat::save_pop
private

controls whether pop is printed each gen

Definition at line 423 of file feat.h.

◆ selector

Selection FT::Feat::selector
private

selection algorithm

Definition at line 406 of file feat.h.

◆ simplify

float FT::Feat::simplify
private

post-run simplification

Definition at line 425 of file feat.h.

◆ starting_pop

string FT::Feat::starting_pop
private

file with starting population

Definition at line 420 of file feat.h.

◆ stats

Log_Stats FT::Feat::stats
private

runtime stats

Definition at line 426 of file feat.h.

◆ str_dim

string FT::Feat::str_dim
private

dimensionality as multiple of number of cols

Definition at line 419 of file feat.h.

◆ survival

string FT::Feat::survival
private

stores survival mode

Definition at line 412 of file feat.h.

◆ survivor

Selection FT::Feat::survivor
private

survival algorithm

Definition at line 409 of file feat.h.

◆ timer

Timer FT::Feat::timer
private

start time of training

Definition at line 403 of file feat.h.

◆ use_arch

bool FT::Feat::use_arch
private

internal control over use of archive

Definition at line 411 of file feat.h.

◆ val_from_arch

bool FT::Feat::val_from_arch
private

model selection only uses Pareto front

Definition at line 424 of file feat.h.

◆ variator

Variation FT::Feat::variator
private

variation operators

Definition at line 408 of file feat.h.


The documentation for this class was generated from the following files: