Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
engine.h
Go to the documentation of this file.
1/* Brush
2copyright 2020 William La Cava
3license: GNU/GPL v3
4*/
5
6#ifndef Engine_H
7#define Engine_H
8
9#include "./util/rnd.h"
10#include "init.h"
11#include "params.h"
12#include "pop/population.h"
13#include "pop/archive.h"
14#include "./eval/evaluation.h"
15#include "vary/variation.h"
16#include "selection/selection.h"
17#include "taskflow/taskflow.hpp"
18
19#include <taskflow/algorithm/for_each.hpp>
20
21namespace Brush
22{
23
24using namespace Pop;
25using namespace Sel;
26using namespace Eval;
27using namespace Var;
28using namespace nlohmann;
29
30template <ProgramType T>
43class Engine{
44public:
46 : params(p)
47 , ss(SearchSpace()) // we need to initialize ss and variator. TODO: make them have a default way so we dont have to initialize here
49 {};
50
52
53 // outputs a progress bar, filled according to @param percentage.
54 void print_progress(float percentage);
55 void calculate_stats();
56 void print_stats(std::ofstream& log, float fraction);
57 void log_stats(std::ofstream& log);
58
59 // all hyperparameters are controlled by the parameter class. please refer to that to change something
60 inline Parameters& get_params(){return params;}
61 inline void set_params(Parameters& p){params=p;}
62
63 inline bool get_is_fitted(){return is_fitted;}
64
66 bool update_best(const Dataset& data, bool val=false);
67
68 // TODO: hyperparameter to set how the best is picked (MCDM, best on val, pareto front, etc). one of the options should be getting the pareto front
69
70 // TODO: best fitness (the class) instead of these. use fitness comparison
74
76 run(data);
77 return *this;
78 };
80 {
81 // Using constructor 2 to create the dataset
84 return fit(d);
85 };
86
87 auto predict(const Dataset& data) { return this->best_ind.predict(data); };
89 {
90 Dataset d(X);
91 return predict(d);
92 };
93
94 template <ProgramType P = T>
96 auto predict_proba(const Dataset &d) { return this->best_ind.predict_proba(d); };
97 template <ProgramType P = T>
100 {
101 Dataset d(X);
102 return predict_proba(d);
103 };
104
106 int get_archive_size(){ return this->archive.individuals.size(); };
107
109 vector<json> get_archive(bool front);
110
112 auto predict_archive(int id, const Dataset& data);
113 auto predict_archive(int id, const Ref<const ArrayXXf>& X);
114
115 template <ProgramType P = T>
116 requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
117 auto predict_proba_archive(int id, const Dataset& data);
118 template <ProgramType P = T>
119 requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
120 auto predict_proba_archive(int id, const Ref<const ArrayXXf>& X);
121
122 // TODO: predict/predict_proba/archive with longitudinal data
123
125 void run(Dataset &d);
126
129
131private:
133
139
141
143
145
146 void init();
147
149 inline void set_is_fitted(bool f){is_fitted=f;}
150};
151
152// Only stuff to make new predictions or call fit again
157
158} // Brush
159#endif
void bind_engine(py::module &m, string name)
holds variable type data.
Definition data.h:51
The Engine class represents the core engine of the brush library.
Definition engine.h:43
float best_score
Definition engine.h:71
Selection< T > selector
selection algorithm
Definition engine.h:135
auto predict(const Dataset &data)
Definition engine.h:87
Timer timer
start time of training
Definition engine.h:142
bool is_fitted
keeps track of whether fit was called.
Definition engine.h:144
Engine(const Parameters &p=Parameters())
Definition engine.h:45
void calculate_stats()
Definition engine.cpp:62
auto predict_proba(const Dataset &d)
Definition engine.h:96
Individual< T > best_ind
Definition engine.h:128
void print_progress(float percentage)
Definition engine.cpp:46
int best_complexity
Definition engine.h:72
void run(Dataset &d)
train the model
Definition engine.cpp:324
Engine< T > & fit(Dataset &data)
Definition engine.h:75
Log_Stats stats
runtime stats
Definition engine.h:140
Archive< T > archive
pareto front archive
Definition engine.h:130
Population< T > pop
population of programs
Definition engine.h:134
auto predict(const Ref< const ArrayXXf > &X)
Definition engine.h:88
Selection< T > survivor
survival algorithm
Definition engine.h:138
Variation< T > variator
variation operators
Definition engine.h:137
Parameters & get_params()
Definition engine.h:60
Individual< T > & get_best_ind()
Definition engine.h:73
void set_params(Parameters &p)
Definition engine.h:61
auto predict_proba(const Ref< const ArrayXXf > &X)
Definition engine.h:99
void set_is_fitted(bool f)
set flag indicating whether fit has been called
Definition engine.h:149
int get_archive_size()
return archive size
Definition engine.h:106
bool update_best(const Dataset &data, bool val=false)
updates best score by searching in the population for the individual that best fits the given data
Definition engine.cpp:288
Evaluation< T > evaluator
evaluation code
Definition engine.h:136
bool get_is_fitted()
Definition engine.h:63
Engine< T > & fit(const Ref< const ArrayXXf > &X, const Ref< const ArrayXf > &y)
Definition engine.h:79
Parameters params
hyperparameters of brush, which the user can interact
Definition engine.h:127
void init()
initialize Feat object for fitting.
Definition engine.cpp:18
auto predict_proba_archive(int id, const Dataset &data)
Definition engine.cpp:248
auto predict_archive(int id, const Dataset &data)
predict on unseen data from the archive
Definition engine.cpp:208
void log_stats(std::ofstream &log)
Definition engine.cpp:129
vector< json > get_archive(bool front)
return population as string
Definition engine.cpp:191
void print_stats(std::ofstream &log, float fraction)
Definition engine.cpp:159
SearchSpace ss
Definition engine.h:132
Class for evaluating the fitness of individuals in a population.
Definition evaluation.h:26
class for timing things.
Definition utils.h:270
Class representing the variation operators in Brush.
Definition variation.h:53
< nsga2 selection operator for getting the front
Definition data.cpp:12
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine< PT::Regressor >, params, best_ind, archive)
Namespace containing scoring functions for evaluation metrics.
float batch_size
Definition params.h:78
float validation_size
Definition params.h:76
bool classification
Definition params.h:71
vector< string > feature_names
Definition params.h:77
The Archive struct represents a collection of individual programs.
Definition archive.h:26
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...
interfaces with selection operators.
Definition selection.h:25