Feat C++ API
A feature engineering automation tool
nsga2.cc
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 
6 #include "nsga2.h"
7 
8 namespace FT{
9 
10  namespace Sel{
12 
16  NSGA2::NSGA2(bool surv){ name = "nsga2"; survival = surv; };
17 
19 
20  size_t NSGA2::tournament(vector<Individual>& pop, size_t i, size_t j) const
21  {
22  Individual& ind1 = pop.at(i);
23  Individual& ind2 = pop.at(j);
24 
25  int flag = ind1.check_dominance(ind2);
26 
27  if (flag == 1) // ind1 dominates ind2
28  return i;
29  else if (flag == -1) // ind2 dominates ind1
30  return j;
31  else if (ind1.crowd_dist > ind2.crowd_dist)
32  return i;
33  else if (ind2.crowd_dist > ind1.crowd_dist)
34  return j;
35  else
36  return i;
37  }
38 
39  vector<size_t> NSGA2::select(Population& pop,
40  const Parameters& params, const Data& d)
41  {
42  /* Selection using Pareto tournaments.
43  *
44  * Input:
45  *
46  * pop: population of programs.
47  * params: parameters.
48  * r: random number generator
49  *
50  * Output:
51  *
52  * selected: vector of indices corresponding to pop that are selected.
53  * modifies individual ranks, objectives and dominations.
54  */
55  vector<size_t> pool(pop.size());
56  std::iota(pool.begin(), pool.end(), 0);
57  // if this is first generation, just return indices to pop
58  if (params.current_gen==0)
59  return pool;
60 
61  vector<size_t> selected(pop.size());
62 
63  for (int i = 0; i < pop.size(); ++i)
64  {
65  size_t winner = tournament(pop.individuals, r.random_choice(pool),
66  r.random_choice(pool));
67  selected.push_back(winner);
68  }
69  return selected;
70  }
71 
72  vector<size_t> NSGA2::survive(Population& pop,
73  const Parameters& params, const Data& d)
74  {
75  /* Selection using the survival scheme of NSGA-II.
76  *
77  * Input:
78  *
79  * pop: population of programs.
80  * params: parameters.
81  * r: random number generator
82  *
83  * Output:
84  *
85  * selected: vector of indices corresponding to pop that are selected.
86  * modifies individual ranks, objectives and dominations.
87  */
88 
89  // set objectives
90  #pragma omp parallel for
91  for (unsigned int i=0; i<pop.size(); ++i)
92  pop.individuals.at(i).set_obj(params.objectives);
93 
94  // fast non-dominated sort
95  fast_nds(pop.individuals);
96 
97  // Push back selected individuals until full
98  vector<size_t> selected;
99  int i = 0;
100  while ( selected.size() + front.at(i).size() < params.pop_size)
101  {
102  std::vector<int>& Fi = front.at(i); // indices in front i
103  crowding_distance(pop,i); // calculate crowding in Fi
104 
105  for (int j = 0; j < Fi.size(); ++j) // Pt+1 = Pt+1 U Fi
106  selected.push_back(Fi.at(j));
107 
108  ++i;
109  }
110 
111  crowding_distance(pop,i); // calculate crowding in final front to include
112  std::sort(front.at(i).begin(),front.at(i).end(),sort_n(pop));
113 
114  const int extra = params.pop_size - selected.size();
115  for (int j = 0; j < extra; ++j) // Pt+1 = Pt+1 U Fi[1:N-|Pt+1|]
116  selected.push_back(front.at(i).at(j));
117 
118  return selected;
119  }
120 
121  void NSGA2::fast_nds(vector<Individual>& individuals)
122  {
123  front.resize(1);
124  front.at(0).clear();
125  //std::vector< std::vector<int> > F(1);
126  #pragma omp parallel for
127  for (int i = 0; i < individuals.size(); ++i) {
128 
129  std::vector<unsigned int> dom;
130  int dcount = 0;
131 
132  Individual& p = individuals.at(i);
133  // p.dcounter = 0;
134  // p.dominated.clear();
135 
136  for (int j = 0; j < individuals.size(); ++j) {
137 
138  Individual& q = individuals.at(j);
139 
140  int compare = p.check_dominance(q);
141  if (compare == 1) { // p dominates q
142  //p.dominated.push_back(j);
143  dom.push_back(j);
144  } else if (compare == -1) { // q dominates p
145  //p.dcounter += 1;
146  dcount += 1;
147  }
148  }
149 
150  #pragma omp critical
151  {
152  p.dcounter = dcount;
153  p.dominated.clear();
154  p.dominated = dom;
155 
156 
157  if (p.dcounter == 0) {
158  p.set_rank(1);
159  front.at(0).push_back(i);
160  }
161  }
162 
163  }
164 
165  // using OpenMP can have different orders in the front.at(0)
166  // so let's sort it so that the algorithm is deterministic
167  // given a seed
168  std::sort(front.at(0).begin(), front.at(0).end());
169 
170  int fi = 1;
171  while (front.at(fi-1).size() > 0) {
172 
173  std::vector<int>& fronti = front.at(fi-1);
174  std::vector<int> Q;
175  for (int i = 0; i < fronti.size(); ++i) {
176 
177  Individual& p = individuals.at(fronti.at(i));
178 
179  for (int j = 0; j < p.dominated.size() ; ++j) {
180 
181  Individual& q = individuals.at(p.dominated.at(j));
182  q.dcounter -= 1;
183 
184  if (q.dcounter == 0) {
185  q.set_rank(fi+1);
186  Q.push_back(p.dominated.at(j));
187  }
188  }
189  }
190 
191  fi += 1;
192  front.push_back(Q);
193  }
194 
195  }
196 
198  {
199  std::vector<int> F = front.at(fronti);
200  if (F.size() == 0 ) return;
201 
202  const int fsize = F.size();
203 
204  for (int i = 0; i < fsize; ++i)
205  pop.individuals.at(F.at(i)).crowd_dist = 0;
206 
207 
208  const int limit = pop.individuals.at(0).obj.size();
209  for (int m = 0; m < limit; ++m) {
210 
211  std::sort(F.begin(), F.end(), comparator_obj(pop,m));
212 
213  // in the paper dist=INF for the first and last, in the code
214  // this is only done to the first one or to the two first when size=2
215  pop.individuals.at(F.at(0)).crowd_dist = std::numeric_limits<float>::max();
216  if (fsize > 1)
217  pop.individuals.at(F.at(fsize-1)).crowd_dist = std::numeric_limits<float>::max();
218 
219  for (int i = 1; i < fsize-1; ++i)
220  {
221  if (pop.individuals.at(F.at(i)).crowd_dist != std::numeric_limits<float>::max())
222  { // crowd over obj
223  pop.individuals.at(F.at(i)).crowd_dist +=
224  (pop.individuals.at(F.at(i+1)).obj.at(m) - pop.individuals.at(F.at(i-1)).obj.at(m))
225  / (pop.individuals.at(F.at(fsize-1)).obj.at(m) - pop.individuals.at(F.at(0)).obj.at(m));
226  }
227  }
228  }
229  }
230  }
231 
232 }
233 
data holding X, y, and Z data
Definition: data.h:42
individual programs in the population
Definition: individual.h:31
int check_dominance(const Individual &b) const
check whether this dominates b.
Definition: individual.cc:898
vector< unsigned int > dominated
individual indices this dominates
Definition: individual.h:47
unsigned int dcounter
number of individuals this dominates
Definition: individual.h:46
float crowd_dist
crowding distance on the Pareto front
Definition: individual.h:49
void set_rank(unsigned r)
setting and getting from individuals vector
Definition: individual.cc:90
T random_choice(const vector< T > &v)
Definition: rnd.h:73
T pop(vector< T > *v)
Definition: auto_backprop.h:49
static Rnd & r
Definition: rnd.h:135
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
holds the hyperparameters for Feat.
Definition: params.h:25
int pop_size
population size
Definition: params.h:28
int current_gen
holds current generation
Definition: params.h:30
vector< string > objectives
Pareto objectives.
Definition: params.h:52
Defines a population of programs and functions for constructing them.
Definition: population.h:28
sort based on objective m
Definition: nsga2.h:63
sort based on rank, breaking ties with crowding distance
Definition: nsga2.h:46
NSGA2(bool surv)
Definition: nsga2.cc:16
void fast_nds(vector< Individual > &)
Definition: nsga2.cc:121
vector< size_t > survive(Population &pop, const Parameters &p, const Data &d)
survival according to the survival scheme of NSGA-II
Definition: nsga2.cc:72
vector< size_t > select(Population &pop, const Parameters &p, const Data &d)
selection according to the survival scheme of NSGA-II
Definition: nsga2.cc:39
size_t tournament(vector< Individual > &pop, size_t i, size_t j) const
Definition: nsga2.cc:20
vector< vector< int > > front
Definition: nsga2.h:34
void crowding_distance(Population &, int)
Definition: nsga2.cc:197