Brush C++ API
A flexible interpretable machine learning framework
Loading...
Searching...
No Matches
thompson.cpp
Go to the documentation of this file.
1#include "thompson.h"
2
3namespace Brush {
4namespace MAB {
5
6template <typename T>
8 : BanditOperator<T>(arms)
9 , dynamic_update(dynamic)
10{
11 for (const auto& arm : arms) {
12 alphas[arm] = 2;
13 betas[arm] = 2;
14 }
15}
16
17template <typename T>
18ThompsonSamplingBandit<T>::ThompsonSamplingBandit(map<T, float> arms_probs, bool dynamic)
19 : BanditOperator<T>(arms_probs)
20 , dynamic_update(dynamic)
21{
22 for (const auto& pair : arms_probs) {
23 alphas[pair.first] = 2;
24 betas[pair.first] = 2;
25 }
26};
27
28
29template <typename T>
31 // gets sampling probabilities using the bandit
32
33 // from https://stackoverflow.com/questions/4181403/generate-random-number-based-on-beta-distribution-using-boost
34 // You'll first want to draw a random number uniformly from the
35 // range (0,1). Given any distribution, you can then plug that number
36 // into the distribution's "quantile function," and the result is as
37 // if a random value was drawn from the distribution.
38
39 // from https://stackoverflow.com/questions/10358064/random-numbers-from-beta-distribution-c
40 // The beta distribution is related to the gamma distribution. Let X be a
41 // random number drawn from Gamma(α,1) and Y from Gamma(β,1), where the
42 // first argument to the gamma distribution is the shape parameter.
43 // Then Z=X/(X+Y) has distribution Beta(α,β).
44
45 if (update) {
46 // 1. use a beta distribution based on alphas and betas to sample probabilities
47 // 2. normalize probabilities so the sum is 1?
48
49 float alpha, beta, X, Y, prob;
50 for (const auto& pair : this->probabilities) {
51 T arm = pair.first;
52
53 alpha = alphas[arm];
54 beta = betas[arm];
55
56 // TODO: stop using boost and use std::gamma_distribution (first, search to see if it is faster)
57 boost::math::gamma_distribution<> gammaX(alpha);
58 boost::math::gamma_distribution<> gammaY(beta);
59
60 X = boost::math::quantile(gammaX, Brush::Util::r.rnd_flt());
61 Y = boost::math::quantile(gammaY, Brush::Util::r.rnd_flt());
62
63 prob = X/(X+Y+0.001f);
64
65 // avoiding deadlocks when sampling from search space
66 this->probabilities[arm] = std::max(prob, 0.01f);
67 }
68
69 // assert that the sum is not zero
70 float totalProb = 0.0f;
71 for (const auto& pair : this->probabilities) {
72 totalProb += pair.second;
73 }
74 assert(totalProb != 0.0f && "Sum of probabilities is zero!");
75 }
76
77 return this->probabilities;
78}
79
80template <typename T>
81T ThompsonSamplingBandit<T>::choose(const VectorXf& context) {
82 std::map<T, float> probs = this->sample_probs(true);
83
84 return r.random_choice(probs);
85}
86
87template <typename T>
88void ThompsonSamplingBandit<T>::update(T arm, float reward, VectorXf& context) {
89 // reward must be either 0 or 1
90
91 alphas[arm] += reward;
92 betas[arm] += 1.0f-reward;
93
94 if (dynamic_update && alphas[arm] + betas[arm] >= C)
95 {
96 alphas[arm] *= C/(C+1.0f) ;
97 betas[arm] *= C/(C+1.0f) ;
98 }
99}
100
101} // MAB
102} // Brush
BanditOperator(vector< T > arms)
Constructs a BanditOperator object with a vector of arms.
std::map< T, float > probabilities
std::map< T, float > alphas
Definition thompson.h:34
ThompsonSamplingBandit(vector< T > arms, bool dynamic=false)
Definition thompson.cpp:7
std::map< T, float > sample_probs(bool update)
Samples the probabilities of the arms.
Definition thompson.cpp:30
std::map< T, float > betas
Definition thompson.h:35
void update(T arm, float reward, VectorXf &context)
Updates the reward for a specific arm.
Definition thompson.cpp:88
T choose(const VectorXf &context)
Chooses an arm based on the given tree and fitness. Should call sample_probs internally.
Definition thompson.cpp:81
static Rnd & r
Definition rnd.h:174
< nsga2 selection operator for getting the front
Definition bandit.cpp:4