15 name =
"fuzzy_fixed_split";
20 threshold_set =
false;
28 name =
"fuzzy_fixed_split_c";
33 threshold_set =
false;
43 x1 = state.
pop<T>().
template cast<float>();
68 state.copy_to_host(x.data(), (state.idx[
'f']-1)*state.N);
74 state.copy_to_host(x.data(), (state.idx[
'c']-1)*state.N);
83 GPU_FuzzyFixedSplit(state.dev_f, state.dev_b, state.idx[
'f'],
86 GPU_FuzzyFixedSplit(state.dev_c, state.dev_b, state.idx[
'c'],
96 state.
push<
bool>(
"(" + state.
popStr<T>() +
"<" +
99 state.
push<
bool>(
"(" + state.
popStr<T>() +
"==" +
121 for (
unsigned i = 0;
i < x.size(); ++
i)
124 vector<float> unique_classes =
unique(y);
125 vector<int> idx(s.size());
126 std::iota(idx.begin(),idx.end(), 0);
127 Map<ArrayXi> midx(idx.data(),idx.size());
140 float best_score = 0;
141 vector<float> neg_scores;
142 vector<float> thresholds;
149 for (
unsigned i =0;
i<s.size()-1; ++
i)
157 val = (s.at(
i) + s.at(
i+1)) / 2;
159 split_idx = (x < val).select(midx,-midx-1);
164 split_idx = (x == val).select(midx,-midx-1);
167 vector<float> d1, d2;
168 for (
unsigned j=0; j< split_idx.size(); ++j)
171 d2.push_back(y(-1-split_idx(j)));
173 d1.push_back(y(split_idx(j)));
175 if (d1.empty() || d2.empty())
182 Map<VectorXf> map_d1(d1.data(), d1.size());
183 Map<VectorXf> map_d2(d2.data(), d2.size());
186 score = gain(map_d1, map_d2, classification,
189 neg_scores.push_back(-score);
190 thresholds.push_back(val);
192 if (score < best_score ||
i == 0)
197 if (thresholds.empty())
207 int index = distance(thresholds.begin(),
208 find(thresholds.begin(), thresholds.end(),
218 const VectorXf& rsplit,
219 bool classification, vector<float> unique_classes)
221 float lscore, rscore, score;
224 lscore = gini_impurity_index(lsplit, unique_classes);
225 rscore = gini_impurity_index(rsplit, unique_classes);
228 score = (lscore*float(lsplit.size()) +
229 rscore*float(rsplit.size()))
230 /(
float(lsplit.size()) + float(rsplit.size()));
234 lscore =
variance(lsplit.array())/float(lsplit.size());
235 rscore =
variance(rsplit.array())/float(rsplit.size());
236 score = lscore + rscore;
244 const VectorXf& classes, vector<float> uc)
246 VectorXf class_weights(uc.size());
248 class_weights(
static_cast<Eigen::Index
>(c)) = 0;
249 class_weights(
static_cast<Eigen::Index
>(c)) = float(
250 (classes.cast<
int>().array() ==
int(c)).count()
256 float gini = 1 - class_weights.dot(class_weights);
data holding X, y, and Z data
void eval_eqn(State &state)
Evaluates the node symbolically.
void set_threshold(ArrayXf &x, VectorXf &y, bool classification)
Uses a heuristic to set a splitting threshold.
float gain(const VectorXf &lsplit, const VectorXf &rsplit, bool classification=false, vector< float > unique_classes=vector< float >())
returns the gain of a split
void evaluate(const Data &data, State &state)
Evaluates the node and updates the state states.
NodeFuzzyFixedSplit * clone_impl() const override
NodeFuzzyFixedSplit * rnd_clone_impl() const override
float gini_impurity_index(const VectorXf &classes, vector< float > uc)
gini impurity of classes in classes
T random_choice(const vector< T > &v)
vector< T > unique(vector< T > w)
returns unique elements in vector
std::string to_string(const T &value)
template function to convert objects to string for logging
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
contains various types of State actually used by feat
Eigen::Array< T, Eigen::Dynamic, 1 > pop()
void push(Eigen::Array< T, Eigen::Dynamic, 1 > value)