10 for (
const auto& arm : arms) {
20 for (
const auto& pair : arms_probs) {
22 betas[pair.first] = 2;
35 string arm = pair.first;
40 this->probabilities[arm] = std::max(std::min(prob, 1.0f), 0.001f);
44 float totalProb = 0.0f;
45 for (
const auto& pair : this->probabilities) {
46 totalProb += pair.second;
48 assert(totalProb != 0.0f &&
"Sum of probabilities is zero!");
55 std::map<string, float> probs = this->
sample_probs(
true);
57 return r.random_choice(probs);
64 betas[arm] += 1.0f-reward;
std::map< string, float > probabilities
BanditOperator(vector< string > arms)
Constructs a BanditOperator object with a vector of arms.
string choose()
Chooses an arm based on the given tree and fitness. Should call sample_probs internally.
std::map< string, float > sample_probs(bool update)
Samples the probabilities of the arms.
void update(string arm, float reward)
Updates the reward for a specific arm.
std::map< string, float > alphas
ThompsonSamplingBandit(vector< string > arms, bool dynamic=false)
std::map< string, float > betas
< nsga2 selection operator for getting the front