17 map<string, float> arms_probs;
18 float prob = 1.0 /
arms.size();
19 for (
const auto& arm :
arms) {
20 arms_probs[arm] = prob;
29 vector<string> arms_names;
30 for (
const auto& pair : arms_probs) {
31 arms_names.push_back(pair.first);
40 if (
type ==
"thompson") {
42 }
else if (
type ==
"dynamic_thompson") {
44 }
else if (
type ==
"dummy") {
76 map<string, float> new_probs = this->
pbandit->sample_probs(
update);
81 for (
auto& pair : new_probs) {
82 if (pair.second <= 0.0f) {
95 this->
pbandit->update(arm, reward);
#define HANDLE_ERROR_THROW(err)
< nsga2 selection operator for getting the front
void update(string arm, float reward)
Updates the bandit's state based on the chosen arm and the received reward.
string get_type()
Gets the type of the bandit.
vector< string > get_arms()
Gets the arms of the bandit.
void set_type(string type)
Sets the type of the bandit.
void set_arms(vector< string > arms)
Sets the arms of the bandit.
std::map< string, float > probabilities
void set_probs(map< string, float > arms_probs)
Sets the probabilities associated with each arm.
string choose()
Selects an arm.
map< string, float > sample_probs(bool update=false)
Samples the probabilities associated with each arm using the policy.
void set_bandit()
Sets the bandit operator (policy).
std::shared_ptr< BanditOperator > pbandit
A shared pointer to the bandit operator (policy).
map< string, float > get_probs()
Gets the probabilities associated with each arm.