Feat C++ API
A feature engineering automation tool
MyRandomCARTree.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Parijat Mazumdar
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #include "MyRandomCARTree.h"
32 #include <shogun/mathematics/Math.h>
33 
34 using namespace shogun;
35 
37 : CMyCARTree()
38 {
39  init();
40 }
41 
43 {
44 }
45 
47 {
48  REQUIRE(size>0, "Subset size should be greater than 0. %d supplied!\n",size)
49  m_randsubset_size=size;
50 }
51 
52 index_t CMyRandomCARTree::compute_best_attribute(const SGMatrix<float64_t>& mat, const SGVector<float64_t>& weights, CDenseLabels* labels,
53  SGVector<float64_t>& left, SGVector<float64_t>& right, SGVector<bool>& is_left_final, index_t &num_missing_final, index_t &count_left,
54  index_t &count_right, float64_t& IG, index_t subset_size, const SGVector<index_t>& active_indices)
55 
56 {
57  auto num_feats = (m_pre_sort) ? mat.num_cols : mat.num_rows;
58 
59  // if subset size is not set choose sqrt(num_feats) by default
60  if (m_randsubset_size==0)
61  m_randsubset_size = std::sqrt((float64_t)num_feats);
62  subset_size=m_randsubset_size;
63 
64  REQUIRE(subset_size<=num_feats, "The Feature subset size(set %d) should be less than"
65  " or equal to the total number of features(%d here).\n",subset_size,num_feats)
66 
67  return CMyCARTree::compute_best_attribute(mat,weights,labels,left,right,is_left_final,num_missing_final,count_left,count_right, IG, subset_size, active_indices);
68 
69 }
70 
72 {
73  CMyRandomCARTree* clone_obj = new CMyRandomCARTree();
74 
75  SG_REF(clone_obj);
76 
77  if(!clone_obj->clone_parameters(this))
78  {
79  SG_UNREF(clone_obj);
80  delete clone_obj;
81  return NULL;
82  }
83 
84  return clone_obj;
85 }
86 
88 {
90 
91  SG_ADD(&m_randsubset_size,"m_randsubset_size", "random features subset size", MS_NOT_AVAILABLE);
92 }
virtual int32_t compute_best_attribute(const SGMatrix< float64_t > &mat, const SGVector< float64_t > &weights, CLabels *labels, SGVector< float64_t > &left, SGVector< float64_t > &right, SGVector< bool > &is_left_final, int32_t &num_missing, int32_t &count_left, int32_t &count_right, float64_t &IG, int32_t subset_size=0, const SGVector< int32_t > &active_indices=SGVector< index_t >())
Definition: MyCARTree.cc:570
This class implements randomized CART algorithm used in the tree growing process of candidate trees i...
virtual index_t compute_best_attribute(const SGMatrix< float64_t > &mat, const SGVector< float64_t > &weights, CDenseLabels *labels, SGVector< float64_t > &left, SGVector< float64_t > &right, SGVector< bool > &is_left_final, index_t &num_missing, index_t &count_left, index_t &count_right, float64_t &IG, index_t subset_size=0, const SGVector< index_t > &active_indices=SGVector< index_t >())
void set_feature_subset_size(index_t size)
CMyRandomCARTree * clone()