class MultiArmedBandit::EpsilonGreedy
Attributes
counts[RW]
epsilon[RW]
n_arms[RW]
probs[RW]
values[RW]
Public Class Methods
new(epsilon, n_arms)
click to toggle source
Initialize an object
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 9 def initialize(epsilon, n_arms) @epsilon = epsilon @n_arms = n_arms reset() end
Public Instance Methods
bulk_update(new_counts, new_rewards)
click to toggle source
Update in a lump. new_counts is a list of each arm’s trial number and new_rewards means a list of rewards.
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 24 def bulk_update(new_counts, new_rewards) # update the numbers of each arm's trial @counts = new_counts # update expectations of each arm new_values = [] @counts.zip( new_rewards ).each do |n, r| new_values << r / n.to_f end @values = new_values # calcurate probabilities j = ind_max(@values) for i in 0..@n_arms-1 do if i == j @probs[i] = 1-@epsilon else @probs[i] = (@epsilon)/(@n_arms-1) end end return @probs end
reset()
click to toggle source
Reset instance variables
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 16 def reset() @counts = Array.new(@n_arms, 0) @values = Array.new(@n_arms, 0.0) @probs = Array.new(@n_arms, 0.0) end
select_arm()
click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 60 def select_arm if rand > @epsilon return ind_max(@values) else return rand(@values.size) end end
update(chosen_arm, reward)
click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 49 def update(chosen_arm, reward) @counts[chosen_arm] = @counts[chosen_arm] + 1 n = @counts[chosen_arm] value = @values[chosen_arm] new_value = ((n - 1) / n.to_f) * value + (1 / n.to_f) * reward @values[chosen_arm] = new_value return end
Private Instance Methods
categorical_draw(probs)
click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 74 def categorical_draw(probs) z = rand() cum_prob = 0.0 probs.size().times do |i| prob = probs[i] cum_prob += prob if cum_prob > z return i end end return probs.size() - 1 end
ind_max(x)
click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 69 def ind_max(x) m = x.max return x.index(m) end