17 vector<char> dtypes = {
'f',
'f'};
19 MatrixXf data(value.size(), 2);
31 float prob_change = std::abs(
slope(data.col(0).array() ,
32 data.col(1).array() ));
35 if (std::abs(prob_change)<1e-4)
40 if (std::isnan(prob_change))
53 vector<Node> terminals;
55 for (
const auto &[feature_name, value]: d.
features )
59 using T = std::decay_t<
decltype(arg)>;
60 using Scalar =
typename T::Scalar;
61 constexpr bool weighted = std::is_same_v<Scalar, float>;
70 float prob_change = 1.0;
72 if (d.
y.size()>0 && weights_init)
75 if (std::holds_alternative<ArrayXf>(value) && d.
y.size()>0)
79 else if (std::holds_alternative<ArrayXi>(value))
84 auto tmp = std::get<ArrayXi>(value);
87 std::map<float, bool> uniqueMap;
88 for(
int i = 0; i < tmp.size(); i++)
89 uniqueMap[(
float)tmp(i)] =
true;
91 ArrayXf slopes = ArrayXf::Ones(uniqueMap.size());
92 int slopesIterator = 0;
93 for (
const auto& pair : uniqueMap)
95 auto one_vs_all = ArrayXf::Ones(tmp.size()).array() * (tmp.array()==pair.first).cast<
float>();
100 prob_change = slopes.mean();
102 else if (std::holds_alternative<ArrayXb>(value))
104 auto tmp = std::get<ArrayXb>(value).template cast<float>();
109 auto msg = fmt::format(
"Brush coudn't calculate the initial weight of variable {}\n",feature_name);
116 terminals.push_back(n);
124 auto signature_avg = [terminals](
DataType ret_type){
128 for (
const auto& n : terminals) {
129 if (n.ret_type == ret_type) {
130 sum += n.get_prob_change();
140 float floats_avg_weights = signature_avg(cXf.ret_type);
141 cXf.set_prob_change(floats_avg_weights);
142 terminals.push_back(cXf);
146 terminals.push_back(cXi);
150 terminals.push_back(cXb);
155 terminals.push_back(meanlabel);
160std::unordered_map<std::size_t, std::string>
ArgsName;
163 std::cout << fmt::format(
"{}\n", *
this) << std::flush;
176 bool use_all = user_ops.size() == 0;
177 vector<string> op_names;
178 for (
const auto& [op, weight] : user_ops)
179 op_names.push_back(op);
196 std::unordered_map<std::string, float> extended_user_ops(user_ops);
201 std::vector<float> vec(d.
y.data(), d.
y.data() + d.
y.size());
203 std::set<float> unique_classes(vec.begin(), vec.end());
206 if (user_ops.find(
"OffsetSum") == user_ops.end())
207 extended_user_ops.insert({
"OffsetSum", 0.0f});
209 if (unique_classes.size()==2 && (user_ops.find(
"Logistic") == user_ops.end())) {
210 extended_user_ops.insert({
"Logistic", 0.0f});
212 else if (user_ops.find(
"Softmax") == user_ops.end()) {
213 extended_user_ops.insert({
"Softmax", 0.0f});
219 std::make_index_sequence<NodeTypes::OpCount>());
223 for (
const auto& term : terminals)
257 auto Tree = tree<Node>();
258 auto spot = Tree.insert(Tree.begin(), root);
263 PTC2(Tree, spot, max_d, max_size);
269 tree<Node>::iterator spot,
int max_d,
int max_size)
const
280 vector<tuple<TreeIter, DataType, int>> queue;
287 Node root = spot.node->data;
303 auto child_spot = Tree.append_child(spot);
304 queue.push_back(make_tuple(child_spot, a, d));
311 while ( queue.size() + s < max_size && queue.size() > 0)
326 if (d >= max_d || s >= max_size)
338 Tree.replace(qspot, n);
350 auto msg = fmt::format(
"Failed to sample operator AND terminal of data type {} during PTC2.\n",
DataTypeName[t]);
359 auto newspot = Tree.replace(qspot, n);
364 auto child_spot = Tree.append_child(newspot);
366 queue.push_back(make_tuple(child_spot, a, d+1));
383 while (queue.size() > 0)
385 if (queue.size() == 0)
396 auto newspot = Tree.replace(qspot, n);
413 int max_d,
int max_size,
const Parameters& params)
holds variable type data.
bool classification
whether this is a classification problem
std::map< string, State > features
dataset features, as key value pairs
std::vector< DataType > unique_data_types
keeps track of the unique data types in the dataset.
ArrayXf y
length N array, the target label
#define HANDLE_ERROR_THROW(err)
float slope(const ArrayXf &x, const ArrayXf &y)
slope of x/y
< nsga2 selection operator for getting the front
Program< PT::Representer > RepresenterProgram
auto Isnt(DataType dt) -> bool
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
Program< PT::BinaryClassifier > ClassifierProgram
auto Is(NodeType nt) -> bool
std::unordered_map< std::size_t, std::string > ArgsName
vector< Node > generate_terminals(const Dataset &d, const bool weights_init)
generate terminals from the dataset features and random constants.
T RandomDequeue(std::vector< T > &Q)
queue for make program
map< DataType, string > DataTypeName
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
Program< PT::Regressor > RegressorProgram
float calc_initial_weight(const ArrayXf &value, const ArrayXf &y)
Program< PT::MulticlassClassifier > MulticlassClassifierProgram
class holding the data for a node in a tree.
std::vector< DataType > arg_types
argument data types
NodeType node_type
the node type
DataType ret_type
return data type
bool get_is_weighted() const
void set_prob_change(float w)
void print() const
prints the search space map.
Map< Node > node_map
Maps return types to argument types to node types.
unordered_map< DataType, vector< Node > > terminal_map
Maps return types to terminals.
void init(const Dataset &d, const unordered_map< string, float > &user_ops={}, bool weights_init=true)
Called by the constructor to initialize the search space.
RegressorProgram make_regressor(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random regressor program. Convenience wrapper for make_program.
unordered_map< DataType, vector< float > > terminal_weights
A map of weights corresponding to elements in terminal_map, used to weight probabilities of each term...
vector< float > get_weights() const
get weights of the return types
void GenerateNodeMap(const unordered_map< string, float > &user_ops, const vector< DataType > &unique_data_types, std::index_sequence< Is... >)
tree< Node > & PTC2(tree< Node > &Tree, tree< Node >::iterator root, int max_d, int max_size) const
std::optional< Node > sample_op(DataType ret) const
get an operator matching return type ret.
RepresenterProgram make_representer(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random representer program. Convenience wrapper for make_program.
Map< float > node_map_weights
A map of weights corresponding to elements in node_map, used to weight probabilities of each node bei...
MulticlassClassifierProgram make_multiclass_classifier(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random multiclass classifier program. Convenience wrapper for make_program.
std::optional< tree< Node > > sample_subtree(Node root, int max_d, int max_size) const
create a subtree with maximum size and depth restrictions and root of type root_type
PT make_program(const Parameters ¶ms, int max_d=0, int max_size=0)
Makes a random program.
vector< DataType > terminal_types
A vector storing the available return types of terminals.
bool has_solution_space(Iter start, Iter end) const
Takes iterators to weight vectors and checks if they have a non-empty solution space....
ClassifierProgram make_classifier(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random classifier program. Convenience wrapper for make_program.
std::optional< Node > sample_terminal(bool force_return=false) const
Get a random terminal.
normalizes a matrix to unit variance, 0 mean centered.
void fit_normalize(MatrixXf &X, const vector< char > &dtypes)