class MultiArmedBandit::EpsilonGreedy

Attributes

counts[RW]
epsilon[RW]
n_arms[RW]
probs[RW]
values[RW]

Public Class Methods

new(epsilon, n_arms) click to toggle source

Initialize an object

# File lib/multi_armed_bandit/epsilon_greedy.rb, line 9
def initialize(epsilon, n_arms)
  @epsilon = epsilon
  @n_arms = n_arms
  reset()    
end

Public Instance Methods

bulk_update(new_counts, new_rewards) click to toggle source

Update in a lump. new_counts is a list of each arm’s trial number and new_rewards means a list of rewards.

# File lib/multi_armed_bandit/epsilon_greedy.rb, line 24
def bulk_update(new_counts, new_rewards)
  
  # update the numbers of each arm's trial
  @counts = new_counts

  # update expectations of each arm
  new_values = []
  @counts.zip( new_rewards ).each do |n, r|
    new_values << r / n.to_f
  end
  @values = new_values

  # calcurate probabilities
  j = ind_max(@values)
  for i in 0..@n_arms-1 do
    if i == j
      @probs[i] = 1-@epsilon
    else
      @probs[i] = (@epsilon)/(@n_arms-1)
    end
  end
    
  return @probs
end
reset() click to toggle source

Reset instance variables

# File lib/multi_armed_bandit/epsilon_greedy.rb, line 16
def reset()
  @counts = Array.new(@n_arms, 0)
  @values = Array.new(@n_arms, 0.0)
  @probs = Array.new(@n_arms, 0.0)
end
select_arm() click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 60
def select_arm
  if rand > @epsilon
    return ind_max(@values)
  else
    return rand(@values.size)
  end
end
update(chosen_arm, reward) click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 49
def update(chosen_arm, reward)
  @counts[chosen_arm] = @counts[chosen_arm] + 1
  n = @counts[chosen_arm]
  
  value = @values[chosen_arm]
  new_value = ((n - 1) / n.to_f) * value + (1 / n.to_f) * reward
  @values[chosen_arm] = new_value
  return
end

Private Instance Methods

categorical_draw(probs) click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 74
def categorical_draw(probs)
  z = rand()
  cum_prob = 0.0

  probs.size().times do |i|
    prob = probs[i]
    cum_prob += prob
    if cum_prob > z
      return i
    end
  end
  
  return probs.size() - 1
end
ind_max(x) click to toggle source
# File lib/multi_armed_bandit/epsilon_greedy.rb, line 69
def ind_max(x)
  m = x.max
  return x.index(m)
end