45using TreeIter = tree<Node>::pre_order_iterator;
54extern std::unordered_map<std::size_t, std::string>
ArgsName;
148 template<
typename PT>
181 SearchSpace(
const Dataset& d,
const unordered_map<string,float>& user_ops = {},
bool weights_init =
true){
182 init(d,user_ops,weights_init);
189 void init(
const Dataset& d,
const unordered_map<string,float>& user_ops = {},
bool weights_init =
true);
196 auto msg = fmt::format(
"{} not in node_map\n",R);
210 auto msg = fmt::format(
"{} not in node_map.at({})\n", sig_hash, R);
224 if (
check(R,sig_hash)){
225 if (
node_map.at(R).at(sig_hash).find(type) ==
node_map.at(R).at(sig_hash).end()){
227 auto msg = fmt::format(
"{} not in node_map[{}][{}]\n",type, sig_hash, R);
240 template<
typename Iter>
242 return !std::all_of(start, end, [](
const auto& w) {
return w<=0.0; });
245 template<
typename F>
Node get(
const string& name);
254 check(R, sig_hash, type);
255 return node_map.at(R).at(sig_hash).at(type);
275 for (
const auto& [arg, name_map] : arg_w_map)
277 for (
const auto& [name, w]: name_map)
295 for (
const auto& [name, w]: name_map)
329 std::fill(data_type_weights.begin(), data_type_weights.end(), 1.0f);
336 data_type_weights.begin(),
338 return std::reduce(tw.second.begin(), tw.second.end()); }
342 data_type_weights.end()))
348 auto match = *
r.select_randomly(
351 data_type_weights.begin(),
352 data_type_weights.end()
356 vector<float> match_weights(match.second.size());
359 std::fill(match_weights.begin(), match_weights.end(), 1.0f);
366 match_weights.begin(),
367 [](
const auto& w){ return w; });
370 match_weights.end()))
374 return *
r.select_randomly(match.second.begin(), match.second.end(),
375 match_weights.begin(), match_weights.end());
394 std::fill(match_weights.begin(), match_weights.end(), 1.0f);
401 match_weights.begin(),
402 [](
const auto& w){ return w; }
407 match_weights.end())) )
413 match_weights.begin(),
414 match_weights.end());
434 auto arg_match = *
r.select_randomly(ret_match.begin(),
439 vector<float> name_w =
get_weights(ret, arg_match.first);
444 return (*
r.select_randomly(arg_match.second.begin(),
445 arg_match.second.end(),
447 name_w.end())).second;
462 vector<Node> matches;
463 vector<float> weights;
464 for (
const auto& kv: ret_match)
466 auto arg_hash = kv.first;
467 auto node_type_map = kv.second;
468 if (node_type_map.find(type) != node_type_map.end())
470 matches.push_back(node_type_map.at(type));
475 if ( (weights.size()==0)
480 return (*
r.select_randomly(matches.begin(),
493 bool terminal_compatible=
true,
494 int max_args=0)
const
504 vector<Node> matches;
505 vector<float> weights;
507 for (
const auto& [args_type, name_map]: args_map) {
508 for (
const auto& [name, node]: name_map) {
509 auto node_arg_types = node.get_arg_types();
513 auto within_size_limit = !(max_args) || (node.get_arg_count() <= max_args);
515 if (
in(node_arg_types, arg) && within_size_limit) {
518 if (terminal_compatible) {
519 bool compatible =
true;
520 for (
const auto& arg_type: node_arg_types) {
521 if (arg_type != arg) {
532 matches.push_back(node);
538 if ( (weights.size()==0)
543 return (*
r.select_randomly(matches.begin(), matches.end(),
544 weights.begin(), weights.end()));
559 if ( (match_weights.size()==0)
561 match_weights.end())) )
564 return (*
r.select_randomly(matches.begin(),
566 match_weights.begin(),
582 tree<Node>&
PTC2(tree<Node>& Tree, tree<Node>::iterator root,
int max_d,
int max_size)
const;
584 template<NodeType NT,
typename S>
587 const auto& unique_data_types,
594 for (
auto arg: S::get_arg_types()){
595 if (!
in(unique_data_types,arg) ){
599 ArgsName[S::hash()] = fmt::format(
"{}", S::get_arg_types());
600 return Node(
NT, S{}, weighted);
603 template<NodeType NT,
typename S>
605 const unordered_map<string,float>& user_ops,
606 const vector<DataType>& unique_data_types
609 bool use_all = user_ops.size() == 0;
612 bool weighted =
false;
619 auto n = n_maybe.value();
620 node_map[n.ret_type][n.args_type()][n.node_type] = n;
622 float w = use_all? 1.0 : user_ops.at(name);
627 template <
NodeType NT,
typename Sigs, std::size_t...
Is>
628 constexpr void AddNodes(
const unordered_map<string, float> &user_ops,
629 const vector<DataType> &unique_data_types,
630 std::index_sequence<Is...>)
635 template<NodeType NT>
636 void MakeNodes(
const unordered_map<string,float>& user_ops,
637 const vector<DataType>& unique_data_types
642 bool use_all = user_ops.size() == 0;
646 if (!use_all & user_ops.find(name) == user_ops.end())
650 constexpr auto size = std::tuple_size<signatures>::value;
654 std::make_index_sequence<size>()
658 template<std::size_t...
Is>
660 const vector<DataType>& unique_data_types,
661 std::index_sequence<Is...>
664 auto nt = [](
auto i) {
return static_cast<NodeType>(1UL << i); };
673 int loc =
r.rnd_int(0, Q.size()-1);
674 std::swap(Q[loc], Q[Q.size()-1]);
688 max_size =
r.rnd_int(1, params.
max_size);
695 auto Tree = tree<Node>();
699 tree<Node>::iterator spot;
706 node_logit.
fixed=
true;
707 auto spot_logit = Tree.insert(Tree.begin(), node_logit);
712 node_offset.
fixed=
true;
714 auto spot_offset = Tree.append_child(spot_logit);
716 spot = Tree.replace(spot_offset, node_offset);
726 node_softmax.
fixed=
true;
728 spot = Tree.insert(Tree.begin(), node_softmax);
734 std::optional<Node> opt=std::nullopt;
736 if (max_size>1 && max_d>1)
744 spot = Tree.insert(Tree.begin(), root);
748 PTC2(Tree, spot, max_d-1, max_size);
750 return P(*
this, Tree);
759 template <
typename FormatContext>
761 string output =
"Search Space\n===\n";
762 output += fmt::format(
"terminal_map: {}\n",
SS.terminal_map);
763 output += fmt::format(
"terminal_weights: {}\n",
SS.terminal_weights);
764 for (
const auto& [ret_type, v] :
SS.node_map) {
765 for (
const auto& [args_type, v2] : v) {
766 for (
const auto& [node_type, node] : v2) {
767 output += fmt::format(
"node_map[{}][{}][{}] = {}, weight = {}\n",
772 SS.node_map_weights.at(ret_type).at(args_type).at(node_type)
778 return formatter<string_view>::format(output, ctx);
holds variable type data.
#define HANDLE_ERROR_THROW(err)
namespace containing Data structures used in Brush
namespace containing various utility functions
bool in(const V &v, const T &i)
check if element is in vector.
< nsga2 selection operator for getting the front
Program< PT::Representer > RepresenterProgram
Program< PT::BinaryClassifier > ClassifierProgram
auto Is(NodeType nt) -> bool
std::unordered_map< std::size_t, std::string > ArgsName
vector< Node > generate_terminals(const Dataset &d, const bool weights_init)
generate terminals from the dataset features and random constants.
T RandomDequeue(std::vector< T > &Q)
queue for make program
tree< Node >::pre_order_iterator Iter
tree< Node >::pre_order_iterator TreeIter
std::map< NodeType, std::string > NodeTypeName
Program< PT::Regressor > RegressorProgram
Program< PT::MulticlassClassifier > MulticlassClassifierProgram
static constexpr bool is_in_v
class holding the data for a node in a tree.
bool fixed
whether node is modifiable
NodeType node_type
the node type
DataType ret_type
return data type
std::size_t args_type() const
void set_prob_change(float w)
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...
unordered_map< DataType, unordered_map< ArgsHash, unordered_map< NodeType, T > > > Map
void print() const
prints the search space map.
static constexpr std::optional< Node > CreateNode(const auto &unique_data_types, bool use_all, bool weighted)
Node get(const string &name)
constexpr void AddNode(const unordered_map< string, float > &user_ops, const vector< DataType > &unique_data_types)
Map< Node > node_map
Maps return types to argument types to node types.
unordered_map< DataType, vector< Node > > terminal_map
Maps return types to terminals.
constexpr void AddNodes(const unordered_map< string, float > &user_ops, const vector< DataType > &unique_data_types, std::index_sequence< Is... >)
std::optional< Node > sample_terminal(DataType R, bool force_return=false) const
Get a random terminal with return type R
void init(const Dataset &d, const unordered_map< string, float > &user_ops={}, bool weights_init=true)
Called by the constructor to initialize the search space.
RegressorProgram make_regressor(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random regressor program. Convenience wrapper for make_program.
bool check(DataType R, size_t sig_hash, NodeType type) const
check if a typed Node is in the search space
Node get(NodeType type, DataType R, S sig)
get a typed node.
bool check(DataType R) const
check if a return type is in the node map
unordered_map< DataType, vector< float > > terminal_weights
A map of weights corresponding to elements in terminal_map, used to weight probabilities of each term...
std::optional< Node > sample_op(NodeType type, DataType R)
Get a specific node type that matches a return value.
vector< float > get_weights() const
get weights of the return types
Node get(NodeType type, DataType R, size_t sig_hash)
get a typed node
vector< float > get_weights(DataType ret) const
get weights of the argument types matching return type ret.
bool check(DataType R, size_t sig_hash) const
check if a function signature is in the search space
void GenerateNodeMap(const unordered_map< string, float > &user_ops, const vector< DataType > &unique_data_types, std::index_sequence< Is... >)
tree< Node > & PTC2(tree< Node > &Tree, tree< Node >::iterator root, int max_d, int max_size) const
std::optional< Node > sample_op(DataType ret) const
get an operator matching return type ret.
SearchSpace(const Dataset &d, const unordered_map< string, float > &user_ops={}, bool weights_init=true)
Construct a search space.
std::optional< Node > get_node_like(Node node) const
get a node with a signature matching node
vector< float > get_weights(DataType ret, ArgsHash sig_hash) const
get the weights of nodes matching a signature.
RepresenterProgram make_representer(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random representer program. Convenience wrapper for make_program.
std::optional< Node > sample_op_with_arg(DataType ret, DataType arg, bool terminal_compatible=true, int max_args=0) const
get operator with at least one argument matching arg
Map< float > node_map_weights
A map of weights corresponding to elements in node_map, used to weight probabilities of each node bei...
MulticlassClassifierProgram make_multiclass_classifier(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random multiclass classifier program. Convenience wrapper for make_program.
std::optional< tree< Node > > sample_subtree(Node root, int max_d, int max_size) const
create a subtree with maximum size and depth restrictions and root of type root_type
PT make_program(const Parameters ¶ms, int max_d=0, int max_size=0)
Makes a random program.
vector< DataType > terminal_types
A vector storing the available return types of terminals.
bool has_solution_space(Iter start, Iter end) const
Takes iterators to weight vectors and checks if they have a non-empty solution space....
void MakeNodes(const unordered_map< string, float > &user_ops, const vector< DataType > &unique_data_types)
ClassifierProgram make_classifier(int max_d=0, int max_size=0, const Parameters ¶ms=Parameters())
Makes a random classifier program. Convenience wrapper for make_program.
std::optional< Node > sample_terminal(bool force_return=false) const
Get a random terminal.