12 using FeatTypes = tuple<ArrayXf,ArrayXi,ArrayXb>;
13 constexpr auto size = std::tuple_size<FeatTypes>::value;
15 return std::make_tuple(feature, threshold);
21 vector<float> thresholds;
22 for (
const auto& val :
unique(x))
23 thresholds.push_back(val);
27 vector<float> thresholds;
28 for (
const auto& val :
unique(x))
29 thresholds.push_back(val.a);
34 vector<float> thresholds;
36 for (
unsigned i =0; i<s.size()-1; ++i)
38 thresholds.push_back((s.at(i) + s.at(i+1))/2.0);
43 vector<float> thresholds;
45 for (
unsigned i =0; i<s.size()-1; ++i)
47 thresholds.push_back((s.at(i).a + s.at(i+1).a)/
float(2.0));
56 [&](
const auto& arg) ->
ArrayXb {
57 using T = std::decay_t<
decltype(arg)>;
58 if constexpr (T::NumDimensions == 1)
61 return ArrayXb::Constant(arg.size(),
true);
66float gain(
const ArrayXf& lsplit,
67 const ArrayXf& rsplit,
68 bool classification, vector<float> unique_classes)
70 float lscore, rscore, score;
77 score = (lscore*float(lsplit.size()) +
78 rscore*float(rsplit.size()))
79 /(
float(lsplit.size()) +
float(rsplit.size()));
83 lscore =
variance(lsplit)/float(lsplit.size());
84 rscore =
variance(rsplit)/float(rsplit.size());
87 score = lscore + rscore;
94 const vector<float>& unique_classes)
96 vector<float> class_weights;
97 for (
auto c : unique_classes){
98 class_weights.push_back(
99 float( (classes.cast<
int>() ==
int(c)).count())/classes.size()
103 auto cw = VectorXf::Map(class_weights.data(), class_weights.size());
104 float gini = 1 - cw.dot(cw);
holds variable type data.
class tree_node_< Node > TreeNode
std::variant< ArrayXb, ArrayXi, ArrayXf, ArrayXXb, ArrayXXi, ArrayXXf, TimeSeriesb, TimeSeriesi, TimeSeriesf, ArrayXbJet, ArrayXiJet, ArrayXfJet, ArrayXXbJet, ArrayXXiJet, ArrayXXfJet, TimeSeriesbJet, TimeSeriesiJet, TimeSeriesfJet > State
defines the possible types of data flowing thru nodes.
vector< float > get_thresholds< ArrayXb >(const ArrayXb &x)
tuple< string, float > get_best_variable_and_threshold(const Dataset &d, TreeNode &tn)
vector< float > get_thresholds< ArrayXfJet >(const ArrayXfJet &x)
ArrayXb threshold_mask< State >(const State &x, const float &threshold)
float gain(const ArrayXf &lsplit, const ArrayXf &rsplit, bool classification, vector< float > unique_classes)
float gini_impurity_index(const ArrayXf &classes, const vector< float > &uc)
vector< float > get_thresholds< ArrayXi >(const ArrayXi &x)
vector< float > get_thresholds< ArrayXf >(const ArrayXf &x)
vector< float > get_thresholds< ArrayXiJet >(const ArrayXiJet &x)
auto get_best_thresholds(const Dataset &d, std::index_sequence< Is... >)
ArrayXb threshold_mask(const T &x, const float &threshold)
Applies a learned threshold to a feature, returning a mask.
vector< float > get_thresholds< ArrayXbJet >(const ArrayXbJet &x)
vector< T > unique(vector< T > w)
returns unique elements in vector
float variance(const ArrayXf &v)
calculate variance
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
Eigen::Array< fJet, Eigen::Dynamic, 1 > ArrayXfJet
Eigen::Array< int, Eigen::Dynamic, 1 > ArrayXi
Eigen::Array< bJet, Eigen::Dynamic, 1 > ArrayXbJet
Eigen::Array< iJet, Eigen::Dynamic, 1 > ArrayXiJet