40 x1 = state.
pop<T>().
template cast<float>();
59 state.copy_to_host(x.data(), (state.idx[
'f']-1)*state.N);
65 state.copy_to_host(x.data(), (state.idx[
'c']-1)*state.N);
74 GPU_Split(state.dev_f, state.dev_b, state.idx[
'f'],
77 GPU_Split(state.dev_c, state.dev_b, state.idx[
'c'],
87 state.
push<
bool>(
"(" + state.
popStr<T>() +
"<" +
90 state.
push<
bool>(
"(" + state.
popStr<T>() +
"==" +
111 vector<float> s =
unique(x);
112 vector<float> unique_classes =
unique(y);
113 vector<int> idx(x.size());
114 std::iota(idx.begin(),idx.end(), 0);
115 Map<ArrayXi> midx(idx.data(),idx.size());
117 float best_score = 0;
124 for (
unsigned i =0;
i<s.size()-1; ++
i)
132 val = (s.at(
i) + s.at(
i+1)) / 2;
133 split_idx = (x < val).select(midx,-midx-1);
138 split_idx = (x == val).select(midx,-midx-1);
144 vector<float> d1, d2;
145 for (
unsigned j=0; j< split_idx.size(); ++j)
148 d2.push_back(y(-1-split_idx(j)));
150 d1.push_back(y(split_idx(j)));
152 if (d1.empty() || d2.empty())
155 Map<VectorXf> map_d1(d1.data(), d1.size());
156 Map<VectorXf> map_d2(d2.data(), d2.size());
159 score = gain(map_d1, map_d2, classification,
162 if (score < best_score ||
i == 0)
181 const VectorXf& rsplit,
182 bool classification, vector<float> unique_classes)
184 float lscore, rscore, score;
187 lscore = gini_impurity_index(lsplit, unique_classes);
188 rscore = gini_impurity_index(rsplit, unique_classes);
191 score = (lscore*float(lsplit.size()) +
192 rscore*float(rsplit.size()))
193 /(
float(lsplit.size()) + float(rsplit.size()));
197 lscore =
variance(lsplit.array())/float(lsplit.size());
198 rscore =
variance(rsplit.array())/float(rsplit.size());
199 score = lscore + rscore;
209 VectorXf class_weights(uc.size());
211 class_weights(
static_cast<Eigen::Index
>(c)) = 0;
212 class_weights(
static_cast<Eigen::Index
>(c)) = float(
213 (classes.cast<
int>().array() ==
int(c)).count()
219 float gini = 1 - class_weights.dot(class_weights);
data holding X, y, and Z data
NodeSplit * rnd_clone_impl() const override
NodeSplit * clone_impl() const override
float gain(const VectorXf &lsplit, const VectorXf &rsplit, bool classification=false, vector< float > unique_classes=vector< float >())
returns the gain of a split
float gini_impurity_index(const VectorXf &classes, vector< float > uc)
gini impurity of classes in classes
void evaluate(const Data &data, State &state)
Evaluates the node and updates the state states.
void set_threshold(ArrayXf &x, VectorXf &y, bool classification)
Uses a heuristic to set a splitting threshold.
void eval_eqn(State &state)
Evaluates the node symbolically.
void GPU_Split(float *xf, bool *xb, size_t idxf, size_t idxb, size_t N, float threshold)
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
vector< T > unique(vector< T > w)
returns unique elements in vector
std::string to_string(const T &value)
template function to convert objects to string for logging
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
contains various types of State actually used by feat
Eigen::Array< T, Eigen::Dynamic, 1 > pop()
void push(Eigen::Array< T, Eigen::Dynamic, 1 > value)