21 for (
int i = 0; i < numPlanes; ++i)
22 storage.push_back(map<
size_t, vector<tree<Node>>>());
26 void append(
const int& storage_n,
const size_t& key,
const tree<Brush::Node> Tree) {
30 storage[storage_n][key] = vector<tree<Node>>();
35 auto& storage_it =
storage[storage_n][key];
38 size_t new_size = Tree.begin().node->get_size();
40 auto it = storage_it.begin();
41 for (; it != storage_it.end(); ++it) {
42 size_t curr_size = it->begin().node->get_size();
44 if (curr_size > new_size) {
47 }
else if (curr_size == new_size) {
49 auto it1 = it->begin();
50 auto it2 = Tree.begin();
52 auto end1 = it->end();
53 auto end2 = Tree.end();
55 bool trees_equal =
true;
56 for (; it1 != end1 && it2 != end2; ++it1, ++it2) {
57 if (it1.node->data.get_node_hash(
false)
58 != it2.node->data.get_node_hash(
false) ){
64 if (trees_equal && it1 == end1 && it2 == end2) {
76 storage_it.insert(it, Tree);
79 vector<tree<Node>>
getList(
const int& storage_n,
const size_t& key) {
80 auto it =
storage[storage_n].find(key);
81 if (it !=
storage[storage_n].end())
93 vector<size_t>
keys(
const int& storage_n) {
94 vector<size_t> result;
95 for (
const auto& pair :
storage[storage_n])
96 result.push_back(pair.first);
101 void print(
const string& prefix, std::ofstream& log)
const {
102 for (
size_t plane_idx = 0; plane_idx <
storage.size(); ++plane_idx) {
103 for (
const auto& kv :
storage[plane_idx]) {
104 size_t key = kv.first;
105 const auto& trees = kv.second;
107 for (
const auto& t : trees) {
111 << t.begin().node->get_model()
120 vector<map<size_t, vector<tree<Node>>>>
storage;
127 void init(
int hashSize,
const Dataset &data,
int numPlanes);
132 template<ProgramType P>
138 while(spot != program.
Tree.end_post())
143 if (spot.node->data.get_prob_change() > 0
149 if (program.
size_at(spot,
true) <= 30
160 template<ProgramType P>
174 auto original_predictions = simplified_program.
predict(d);
179 while(spot != simplified_program.
Tree.end_post())
184 if (spot.node->data.get_prob_change() > 0
199 float threshold = 1e-5;
201 float best_distance = threshold;
202 tree<Node> best_branch;
203 for (
const auto& cand : res.value()) {
205 const tree<Node> original_branch(spot);
206 const tree<Node> simplified_branch(cand);
212 simplified_program.
Tree.erase_children(spot);
214 spot = simplified_program.
Tree.move_ontop(spot, simplified_branch.begin());
216 auto new_predictions = simplified_program.
predict(d);
218 float diff = (original_predictions.template cast<float>() - new_predictions.template cast<float>()).square().mean();
220 if (diff < best_distance) {
221 best_distance = diff;
226 simplified_program.
Tree.erase_children(spot);
227 spot = simplified_program.
Tree.move_ontop(spot, original_branch.begin());
229 if (best_distance < threshold) {
232 simplified_program.
Tree.erase_children(spot);
234 const tree<Node> best_branch_copy(best_branch);
237 spot = simplified_program.
Tree.move_ontop(spot, best_branch_copy.begin());
247 program.
Tree = simplified_program.
Tree;
249 return simplified_program;
255 template<ProgramType P>
258 const tree<Node> tree_copy(spot);
264 auto hashes =
hash<P>(spot, d);
266 for (
size_t i = 0; i < hashes.size(); ++i)
277 log <<
"DataType,Plane,Key,Tree\n";
285 hs.
print(prefix, log);
291 template<ProgramType P>
303 ArrayXf floatClippedInput;
306 ArrayXXf inputPoint = (*spot.node).
template predict<ArrayXXf>(d);
307 floatClippedInput = Eigen::Map<ArrayXf>(inputPoint.data(), inputPoint.size()).head(
inputDim).template cast<float>();
310 ArrayXb inputPointB = (*spot.node).
template predict<ArrayXb>(d);
311 floatClippedInput = inputPointB.template cast<float>();
314 ArrayXi inputPointI = (*spot.node).
template predict<ArrayXi>(d);
315 floatClippedInput = inputPointI.template cast<float>();
318 floatClippedInput = (*spot.node).
template predict<ArrayXf>(d);
327 assert(floatClippedInput.size() ==
inputDim &&
328 "You need to pass a dataset with inputDim samples to the simplification.");
331 float floatClippedInput_mean = floatClippedInput.mean();
332 floatClippedInput = floatClippedInput - floatClippedInput_mean;
334 vector<size_t> hashes;
335 for (
size_t planeIdx = 0; planeIdx <
uniformPlanes.size(); ++planeIdx)
340 Eigen::ArrayXf projection = plane * floatClippedInput.matrix();
341 Eigen::Array<bool, Eigen::Dynamic, 1> comparison = (projection > 0);
343 size_t input_hash = 0;
344 for (
int i = 0; i < comparison.size(); ++i) {
346 input_hash |= comparison(i) ? 1 : 0;
349 hashes.push_back(input_hash);
355 template<ProgramType P>
360 int spot_size = spot.node->get_size();
363 vector<tree<Node>> matches = {};
365 vector<size_t> hashes =
hash<P>(spot, d);
367 for (
int i = 0; i < hashes.size(); ++i){
371 vector<tree<Node>> newCandidates =
equivalentExpressions[spot.node->data.ret_type].getList(i, hashes[i]);
373 if (newCandidates.size() == 0)
377 for (
const auto& cand : newCandidates) {
378 if (cand.begin().node->get_size() < spot_size) {
379 matches.push_back(cand);
380 if (++count >= 25)
break;
388 if (matches.size() > 0)
holds variable type data.
vector< map< size_t, vector< tree< Node > > > > storage
vector< tree< Node > > getList(const int &storage_n, const size_t &key)
void append(const int &storage_n, const size_t &key, const tree< Brush::Node > Tree)
void print(const string &prefix, std::ofstream &log) const
vector< size_t > keys(const int &storage_n)
HashStorage(int numPlanes=10)
string dt_to_string(DataType dt)
optional< vector< tree< Node > > > query(TreeIter &spot, const Dataset &d)
vector< size_t > hash(TreeIter &spot, const Dataset &d)
vector< MatrixXf > uniformPlanes
void index(TreeIter &spot, const Dataset &d)
Program< P > simplify_tree(Program< P > &program, const SearchSpace &ss, const Dataset &d)
void analyze_tree(Program< P > &program, const SearchSpace &ss, const Dataset &d)
void init(int hashSize, const Dataset &data, int numPlanes)
std::unordered_map< DataType, HashStorage > equivalentExpressions
void log_simplification_table(std::ofstream &log)
< nsga2 selection operator for getting the front
auto Isnt(DataType dt) -> bool
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
tree< Node >::pre_order_iterator TreeIter
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
class holding the data for a node in a tree.
An individual program, a.k.a. model.
TreeType predict(const Dataset &d)
the standard predict function. Returns the output of the Tree directly.
int size_at(Iter &top, bool include_weight=true) const
count the size of a given subtree, optionally including the weights in weighted nodes....
Holds a search space, consisting of operations and terminals and functions, and methods to sample tha...