class RANN::Backprop
Constants
- ACTIVATION_DERIVATIVES
Attributes
network[RW]
Public Class Methods
bptt_connecting_to(neuron, network, timestep)
click to toggle source
# File lib/rann/backprop.rb, line 246 def self.bptt_connecting_to neuron, network, timestep # halt traversal if we're at a context and we're at the base timestep return [] if neuron.context? && timestep == 0 timestep -= 1 if neuron.context? network.connections_to(neuron).each.with_object [] do |c, a| # don't enqueue connections from inputs next if c.input_neuron.input? a << [c.input_neuron, timestep] end end
mse(targets, outputs)
click to toggle source
# File lib/rann/backprop.rb, line 232 def self.mse targets, outputs total_squared_error = 0.to_d targets.size.times do |i| total_squared_error += (targets[i] - outputs[i]) ** 2 / 2 end total_squared_error end
mse_delta(target, actual)
click to toggle source
# File lib/rann/backprop.rb, line 242 def self.mse_delta target, actual actual - target end
new(network, opts = {})
click to toggle source
# File lib/rann/backprop.rb, line 23 def initialize network, opts = {} @network = network @connections_hash = network.connections.each.with_object({}){ |c, h| h[c.id] = c } @optimiser = RANN::Optimisers.const_get(opts[:optimiser] || 'RMSProp').new opts @batch_count = 0.to_d end
reset!(network)
click to toggle source
# File lib/rann/backprop.rb, line 227 def self.reset! network network.reset! network.neurons.select(&:context?).each{ |n| n.value = 0.to_d } end
run_single(network, inputs, targets)
click to toggle source
# File lib/rann/backprop.rb, line 98 def self.run_single network, inputs, targets states = [] inputs = [inputs] if inputs.flatten == inputs # run the data into the network. (feed forward) # all but last (inputs.size - 1).times do |timestep| network.evaluate inputs[timestep] states[timestep] = network.reset! end # last outputs = network.evaluate inputs.last states[inputs.size - 1] = network.reset! # calculate error error = mse targets, outputs # backward pass with unravelling for recurrent networks node_deltas = Hash.new{ |h, k| h[k] = {} } initial_timestep = inputs.size - 1 neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] } # initialize network end-point node_deltas in all timesteps with zero network.neurons_with_no_outgoing_connections.each do |n| (0...(inputs.size - 1)).each do |i| node_deltas[i][n.id] = 0.to_d neuron_stack << [n, i] end end gradients = Hash.new 0.to_d while current = neuron_stack.shift neuron, timestep = current next if node_deltas[timestep].key? neuron.id # neuron delta is summation of neuron deltas deltas for the connections # from this neuron if neuron.output? output_index = network.output_neurons.index neuron step_one = mse_delta targets[output_index], outputs[output_index] else sum = network.connections_from(neuron).reduce 0.to_d do |m, c| out_timestep = c.output_neuron.context? ? timestep + 1 : timestep output_node_delta = node_deltas[out_timestep][c.output_neuron.id] if out_timestep > initial_timestep m elsif !output_node_delta break else # connection delta is the output neuron delta multiplied by the # connection's weight connection_delta = if c.output_neuron.is_a? ProductNeuron intermediate = network.connections_to(c.output_neuron).reject{ |c2| c2 == c }.reduce 1.to_d do |m, c2| m * states[timestep][:values][c2.input_neuron.id] * c2.weight end output_node_delta * intermediate * c.weight else output_node_delta * c.weight end m + connection_delta end end step_one = sum || next end from_here = bptt_connecting_to neuron, network, timestep neuron_stack |= from_here node_delta = ACTIVATION_DERIVATIVES[neuron.activation_function] .call(states[timestep][:values][neuron.id]) * step_one node_deltas[timestep][neuron.id] = node_delta in_timestep = neuron.context? ? timestep - 1 : timestep network.connections_to(neuron).each do |c| # connection gradient is the output neuron delta multipled by the # connection's input neuron value. gradient = if c.output_neuron.is_a? ProductNeuron intermediate = states[timestep][:intermediates][c.output_neuron.id] node_delta * intermediate / c.weight elsif c.input_neuron.context? && timestep == 0 0.to_d else node_delta * states[in_timestep][:values][c.input_neuron.id] end gradients[c.id] += gradient end end reset! network [gradients, error] end
Public Instance Methods
restore(filepath = nil)
click to toggle source
# File lib/rann/backprop.rb, line 211 def restore filepath = nil unless filepath filepath = Dir['*'].select{ |f| f =~ /rann_savepoint_.*/ }.sort.last unless filepath @network.init_normalised! puts "No savepoints found—initialised normalised weights" return end end weights, opt_vars = YAML.load_file(filepath) @network.impose(weights) @network.optimiser.load_state(opt_vars) end
run_batch(inputs, targets, opts = {})
click to toggle source
# File lib/rann/backprop.rb, line 30 def run_batch inputs, targets, opts = {} @batch_count += 1 batch_size = inputs.size avg_gradients = Hash.new{ |h, k| h[k] = 0 } avg_batch_error = 0 # force longer bits of work per iteration, to maximise CPU usage less # marshalling data and process overhead etc. best for small networks. for # larger networks where one unit of work takes a long time, and the work # can vary in time taken, use num_groups == inputs.size num_groups = opts[:num_groups] || ([1, opts[:processes]].max * 10) grouped_inputs = in_groups(inputs, num_groups, false).reject &:empty? reduce_proc = lambda do |_, _, result| group_avg_gradients, group_avg_error = result avg_gradients.merge!(group_avg_gradients){ |_, o, n| o + n } avg_batch_error += group_avg_error end Parallel.each_with_index( grouped_inputs, in_processes: opts[:processes], finish: reduce_proc ) do |inputs, i| group_avg_gradients = Hash.new{ |h, k| h[k] = 0.to_d } group_avg_error = 0.to_d inputs.each_with_index do |input, j| gradients, error = Backprop.run_single network, input, targets[i + j] gradients.each do |cid, g| group_avg_gradients[cid] += g / batch_size end group_avg_error += error / batch_size end group_avg_gradients.default_proc = nil [group_avg_gradients, group_avg_error] end if opts[:checking] # check assumes batchsize 1 for now sorted_gradients = avg_gradients.values_at *network.connections.map(&:id) invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients if invalid.empty? puts "gradient valid" else puts "gradients INVALID for connections:" invalid.each do |i| puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}" end end end avg_gradients.each do |con_id, gradient| con = @connections_hash[con_id] next if con.locked? update = @optimiser.update gradient, con.id con.weight += update end avg_batch_error end
save(filepath = nil)
click to toggle source
# File lib/rann/backprop.rb, line 200 def save filepath = nil filepath ||= "rann_savepoint_#{DateTime.now.strftime('%Y-%m-%d-%H-%M-%S')}.yml" weights = @network.params opt_vars = @optimiser.state File.open filepath, "w" do |f| f.write YAML.dump [weights, opt_vars] end end