Feat C++ API
A feature engineering automation tool
MyRandomForest.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Parijat Mazumdar
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #include "MyRandomForest.h"
32 #include "MyRandomCARTree.h"
33 
34 using namespace shogun;
35 
37 : CBaggingMachine()
38 {
39  m_machine = new CMyRandomCARTree();
40  m_weights=SGVector<float64_t>();
41 
42  SG_ADD(&m_weights,"m_weights","weights",MS_NOT_AVAILABLE)
43 }
44 
46 {
47 }
48 
49 void CMyRandomForest::set_weights(SGVector<float64_t> weights)
50 {
51  m_weights=weights;
52 }
53 
54 SGVector<float64_t> CMyRandomForest::get_weights() const
55 {
56  return m_weights;
57 }
58 
59 void CMyRandomForest::set_feature_types(SGVector<bool> ft)
60 {
61  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
62  dynamic_cast<CMyRandomCARTree*>(m_machine)->set_feature_types(ft);
63 }
64 
65 SGVector<bool> CMyRandomForest::get_feature_types() const
66 {
67  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
68  return dynamic_cast<CMyRandomCARTree*>(m_machine)->get_feature_types();
69 }
70 
72 {
73  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
74  return dynamic_cast<CMyRandomCARTree*>(m_machine)->get_machine_problem_type();
75 }
76 
78 {
79  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
80  dynamic_cast<CMyRandomCARTree*>(m_machine)->set_machine_problem_type(mode);
81 }
82 
83 void CMyRandomForest::set_num_random_features(int32_t rand_featsize)
84 {
85  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
86  REQUIRE(rand_featsize>0,"feature subset size should be greater than 0\n")
87 
88  dynamic_cast<CMyRandomCARTree*>(m_machine)->set_feature_subset_size(rand_featsize);
89 }
90 
92 {
93  REQUIRE(m_machine,"m_machine is NULL. It is expected to be RandomCARTree\n")
94  return dynamic_cast<CMyRandomCARTree*>(m_machine)->get_feature_subset_size();
95 }
96 
97 void CMyRandomForest::set_machine_parameters(CMachine* m, SGVector<index_t> idx)
98 {
99  REQUIRE(m,"Machine supplied is NULL\n")
100  REQUIRE(m_machine,"Reference Machine is NULL\n")
101 
102  CMyRandomCARTree* tree=dynamic_cast<CMyRandomCARTree*>(m);
103 
104  SGVector<float64_t> weights(idx.vlen);
105 
106  if (m_weights.vlen==0)
107  {
108  weights.fill_vector(weights.vector,weights.vlen,1.0);
109  }
110  else
111  {
112  for (int32_t i=0;i<idx.vlen;i++)
113  weights[i]=m_weights[idx[i]];
114  }
115 
116  tree->set_weights(weights);
118  // equate the machine problem types - cloning does not do this
119  tree->set_machine_problem_type(dynamic_cast<CMyRandomCARTree*>(m_machine)->get_machine_problem_type());
120 
121 }
122 
123 bool CMyRandomForest::train_machine(CFeatures* data)
124 {
125  if (data)
126  {
127  SG_REF(data);
128  SG_UNREF(m_features);
129  m_features = data;
130  }
131 
132  REQUIRE(m_features, "Training features not set!\n");
133 
134  dynamic_cast<CMyRandomCARTree*>(m_machine)->pre_sort_features(m_features, m_sorted_transposed_feats, m_sorted_indices);
135 
136  return CBaggingMachine::train_machine();
137 }
138 
140 {
141  size_t num_features = get_feature_types().size();
142  vector<double> importances(num_features, 0.0); //set to zero for all attributes
143 
144  for (int32_t i = 0; i < m_num_bags; ++i)
145  {
146  CMyRandomCARTree* m = dynamic_cast<CMyRandomCARTree*>(m_bags->get_element(i));
147 
148  vector<double> m_imp = m->feature_importances();
149 
150  for(size_t j = 0; j < num_features; j++)
151  importances[j] += m_imp[j];
152  }
153 
154  for(size_t i = 0; i < num_features; i++)
155  importances[i] += m_num_bags;
156 
157  return importances;
158 }
159 
160 void CMyRandomForest::set_probabilities(CLabels* labels, CFeatures* data)
161 {
162  SGMatrix<float64_t> output = apply_outputs_without_combination(data);
163 
164  CMeanRule* mean_rule = new CMeanRule();
165 
166  SGVector<float64_t> probabilities = mean_rule->combine(output);
167 
168  labels->set_values(probabilities);
169 
170  SG_UNREF(mean_rule);
171 }
void set_weights(SGVector< float64_t > w)
Definition: MyCARTree.cc:208
std::vector< double > feature_importances()
Definition: MyCARTree.cc:1580
void set_machine_problem_type(EProblemType mode)
Definition: MyCARTree.cc:100
void set_sorted_features(SGMatrix< float64_t > &sorted_feats, SGMatrix< index_t > &sorted_indices)
Definition: MyCARTree.cc:324
This class implements randomized CART algorithm used in the tree growing process of candidate trees i...
SGVector< bool > get_feature_types() const
std::vector< double > feature_importances()
void set_probabilities(CLabels *labels, CFeatures *data=NULL)
virtual bool train_machine(CFeatures *data=NULL)
virtual void set_machine_parameters(CMachine *m, SGVector< index_t > idx)
int32_t get_num_random_features() const
SGVector< float64_t > m_weights
virtual EProblemType get_machine_problem_type() const
SGMatrix< float64_t > m_sorted_transposed_feats
void set_num_random_features(int32_t rand_featsize)
void set_feature_types(SGVector< bool > ft)
SGVector< float64_t > get_weights() const
void set_machine_problem_type(EProblemType mode)
SGMatrix< index_t > m_sorted_indices
void set_weights(SGVector< float64_t > weights)
int i
Definition: params.cc:552