14    template<
typename T> 
requires same_as<typename T::Scalar, bool>
 
   18    template<
typename T> 
requires same_as<typename T::Scalar, bJet>
 
   21        for (
int i = 0; i< x.size(); ++i)
 
 
   25    template<
typename T> 
requires same_as<typename T::Scalar, float>
 
   27        return (x >= threshold); 
 
 
   29    template<
typename T> 
requires same_as<typename T::Scalar, fJet>
 
   33            x.begin(), x.end(), ret.begin(), 
 
   34            [&](
const auto& e){return e >= threshold;}
 
 
   38    template<
typename T> 
requires same_as<typename T::Scalar, int>
 
   40        return (x == threshold); 
 
 
   43    template<
typename T> 
requires same_as<typename T::Scalar, iJet>
 
   48            x.begin(), x.end(), ret.begin(), 
 
   49            [&](
const auto& e){return e == threshold;}
 
 
   54    float gain(
const ArrayXf& lsplit, 
const ArrayXf& rsplit, 
bool classification, 
 
   61    tuple<float,float> 
best_threshold(
const T& x, 
const ArrayXf& y, 
bool classification)
 
   75        float best_thresh, best_score = 
MAX_FLT;
 
   80            unique_classes = unique(y);
 
   84        for (
const auto thresh: all_thresholds)
 
   89            tie (L_idx, R_idx) = Util::mask_to_indices(mask);
 
   92            const ArrayXf& lhs = y(L_idx); 
 
   93            const ArrayXf& rhs = y(R_idx); 
 
   95            if (lhs.size() == 0 || rhs.size() == 0)
 
   99            float score = 
gain(lhs, rhs, classification, unique_classes);
 
  101            if (score < best_score || i == 0)
 
  104                best_thresh = thresh;
 
  109        best_thresh = std::isinf(best_thresh)? 
 
  110            0 : std::isnan(best_thresh)? 
 
  113        return make_tuple(best_thresh, best_score);
 
 
  120        DataType DT = DataTypeEnum<T>::value;
 
  137        for (
const auto& key : keys) 
 
  139            float tmp_thresh, score;
 
  143            if (score < best_score | i == 0)
 
  147                threshold = tmp_thresh;
 
  151        auto tmp = std::make_tuple(feature, threshold, best_score);
 
  152        results.push_back(std::make_tuple(feature, threshold, best_score));
 
 
  155    template<
typename Ts,  std::size_t... Is> 
 
  159        using entry = tuple<string, float, float>;
 
  160        auto compare = [](
const entry& a, 
const entry& b){ 
 
  161            return (std::get<2>(a) < std::get<2>(b)); 
 
  168        auto best = std::ranges::min_element(results, compare);
 
 
  177        T result(mask.size());
 
  180        tie (L_idx, R_idx) = Util::mask_to_indices(mask);
 
  181        result(L_idx) = child_outputs.at(0);
 
  182        result(R_idx) = child_outputs.at(1);
 
 
 
  190template<NodeType NT, 
typename S, 
bool Fit> 
 
  191struct Operator<NT, S, Fit, enable_if_t<
is_in_v<NT, 
NodeType::SplitOn, NodeType::SplitBest>>> 
 
  196    using W = 
typename S::WeightType;
 
  199    template <std::
size_t N>
 
  203    static constexpr Function<NT> 
F{};
 
  211        if constexpr (NT==NodeType::SplitOn)
 
  212            sib = sib->next_sibling;
 
  214        for (
int i = 0; i < 2; ++i)
 
  216            if (d.at(i).get_n_samples() > 0)
 
  219                    child_outputs.at(i) = sib->fit<arg_type>(d.at(i));
 
  221                    child_outputs.at(i) = sib->predict<arg_type>(d.at(i), weights);
 
  223            sib = sib->next_sibling;
 
  225        return child_outputs;
 
 
  229        auto& threshold = tn.data.W;
 
  232        if constexpr (NT == NodeType::SplitOn)
 
  242            if (tn.data.get_keep_split_feature() && tn.data.get_feature()!=
"")
 
  246                auto values = d[tn.data.get_feature()];
 
  249                if (std::holds_alternative<ArrayXf>(values))
 
  251                else if (std::holds_alternative<ArrayXi>(values))
 
  253                else if (std::holds_alternative<ArrayXb>(values))
 
  260                tn.data.set_feature(feature);
 
 
  270        const auto& threshold = tn.data.W;
 
  271        const auto& feature = tn.data.get_feature();
 
  280        else if constexpr (NT==NodeType::SplitBest)
 
  283            auto split_feature = tn.first_child->predict<
FirstArg>(d, weights);
 
  289        auto child_outputs = 
get_kids(data_splits, tn, weights);
 
 
 
holds variable type data.
bool classification
whether this is a classification problem
DataType get_feature_type(const string &name) const
int get_n_samples() const
std::unordered_map< DataType, vector< string > > features_of_type
map from data types to features having that type.
ArrayXf y
length N array, the target label
std::array< Dataset, 2 > split(const ArrayXb &mask) const
class tree_node_< Node > TreeNode
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
ArrayXb threshold_mask(const T &x, const float &threshold)
Applies a learned threshold to a feature, returning a mask.
auto get_best_thresholds(const Dataset &d, std::index_sequence< Is... >)
vector< float > get_thresholds(const T &x)
float gain(const ArrayXf &lsplit, const ArrayXf &rsplit, bool classification, vector< float > unique_classes)
tuple< float, float > best_threshold(const T &x, const ArrayXf &y, bool classification)
void get_best_threshold_by_type(const Dataset &d, auto &results)
float gini_impurity_index(const ArrayXf &classes, const vector< float > &uc)
T stitch(array< T, 2 > &child_outputs, const ArrayXb &mask)
Stitches together outputs from left or right child based on threshold.
tuple< string, float > get_best_variable_and_threshold(const Dataset &d, TreeNode &tn)
static constexpr bool is_in_v
typename S::RetType RetType
typename S::ArgTypes ArgTypes
static constexpr Function< NT > F
RetType fit(const Dataset &d, TreeNode &tn) const
static constexpr size_t ArgCount
RetType eval(const Dataset &d, TreeNode &tn, const W **weights=nullptr) const
typename S::template NthType< N > NthType
array< RetType, 2 > get_kids(const array< Dataset, 2 > &d, TreeNode &tn, const W **weights=nullptr) const
typename S::FirstArg FirstArg
RetType predict(const Dataset &d, TreeNode &tn, const W **weights=nullptr) const