11 for (
const auto& arm : arms) {
22 for (
const auto& pair : arms_probs) {
24 betas[pair.first] = 2;
49 float alpha, beta, X, Y, prob;
57 boost::math::gamma_distribution<> gammaX(alpha);
58 boost::math::gamma_distribution<> gammaY(beta);
63 prob = X/(X+Y+0.001f);
66 this->probabilities[arm] = std::max(prob, 0.01f);
70 float totalProb = 0.0f;
71 for (
const auto& pair : this->probabilities) {
72 totalProb += pair.second;
74 assert(totalProb != 0.0f &&
"Sum of probabilities is zero!");
84 return r.random_choice(probs);
92 betas[arm] += 1.0f-reward;
BanditOperator(vector< T > arms)
Constructs a BanditOperator object with a vector of arms.
std::map< T, float > probabilities
std::map< T, float > alphas
ThompsonSamplingBandit(vector< T > arms, bool dynamic=false)
std::map< T, float > sample_probs(bool update)
Samples the probabilities of the arms.
std::map< T, float > betas
void update(T arm, float reward, VectorXf &context)
Updates the reward for a specific arm.
T choose(const VectorXf &context)
Chooses an arm based on the given tree and fitness. Should call sample_probs internally.
< nsga2 selection operator for getting the front