26 name =
"fuzzy_split_c";
40 x1 = state.
pop<T>().
template cast<float>();
59 state.copy_to_host(x.data(), (state.idx[
'f']-1)*state.N);
65 state.copy_to_host(x.data(), (state.idx[
'c']-1)*state.N);
74 GPU_FuzzySplit(state.dev_f, state.dev_b, state.idx[
'f'],
77 GPU_FuzzySplit(state.dev_c, state.dev_b, state.idx[
'c'],
87 state.
push<
bool>(
"(" + state.
popStr<T>() +
"<" +
90 state.
push<
bool>(
"(" + state.
popStr<T>() +
"==" +
112 for (
unsigned i = 0;
i < x.size(); ++
i)
115 vector<float> unique_classes =
unique(y);
116 vector<int> idx(s.size());
117 std::iota(idx.begin(),idx.end(), 0);
118 Map<ArrayXi> midx(idx.data(),idx.size());
128 float best_score = 0;
129 vector<float> neg_scores;
130 vector<float> thresholds;
137 for (
unsigned i =0;
i<s.size()-1; ++
i)
145 val = (s.at(
i) + s.at(
i+1)) / 2;
146 split_idx = (x < val).select(midx,-midx-1);
151 split_idx = (x == val).select(midx,-midx-1);
154 vector<float> d1, d2;
155 for (
unsigned j=0; j< split_idx.size(); ++j)
158 d2.push_back(y(-1-split_idx(j)));
160 d1.push_back(y(split_idx(j)));
162 if (d1.empty() || d2.empty())
165 Map<VectorXf> map_d1(d1.data(), d1.size());
166 Map<VectorXf> map_d2(d2.data(), d2.size());
169 score = gain(map_d1, map_d2, classification,
172 neg_scores.push_back(-score);
173 thresholds.push_back(val);
176 if (score < best_score ||
i == 0)
181 if (thresholds.empty())
191 int index = distance(thresholds.begin(),
192 find(thresholds.begin(), thresholds.end(),
202 const VectorXf& rsplit,
203 bool classification, vector<float> unique_classes)
205 float lscore, rscore, score;
208 lscore = gini_impurity_index(lsplit, unique_classes);
209 rscore = gini_impurity_index(rsplit, unique_classes);
212 score = (lscore*float(lsplit.size()) +
213 rscore*float(rsplit.size()))
214 /(
float(lsplit.size()) + float(rsplit.size()));
218 lscore =
variance(lsplit.array())/float(lsplit.size());
219 rscore =
variance(rsplit.array())/float(rsplit.size());
220 score = lscore + rscore;
228 const VectorXf& classes, vector<float> uc)
230 VectorXf class_weights(uc.size());
232 class_weights(
static_cast<Eigen::Index
>(c)) = 0;
233 class_weights(
static_cast<Eigen::Index
>(c)) = float(
234 (classes.cast<
int>().array() ==
int(c)).count()
240 float gini = 1 - class_weights.dot(class_weights);
data holding X, y, and Z data
void eval_eqn(State &state)
Evaluates the node symbolically.
void evaluate(const Data &data, State &state)
Evaluates the node and updates the state states.
NodeFuzzySplit * clone_impl() const override
void set_threshold(ArrayXf &x, VectorXf &y, bool classification)
Uses a heuristic to set a splitting threshold.
float gini_impurity_index(const VectorXf &classes, vector< float > uc)
gini impurity of classes in classes
NodeFuzzySplit * rnd_clone_impl() const override
float gain(const VectorXf &lsplit, const VectorXf &rsplit, bool classification=false, vector< float > unique_classes=vector< float >())
returns the gain of a split
T random_choice(const vector< T > &v)
vector< T > unique(vector< T > w)
returns unique elements in vector
std::string to_string(const T &value)
template function to convert objects to string for logging
float variance(const ArrayXf &v, float mean)
calculate variance when mean provided
contains various types of State actually used by feat
Eigen::Array< T, Eigen::Dynamic, 1 > pop()
void push(Eigen::Array< T, Eigen::Dynamic, 1 > value)