module Torchrb::NN::ImageDefault

Public Instance Methods

define_nn(options) click to toggle source
# File lib/torchrb/nn/image_default.rb, line 3
  def define_nn options
    # Dimensions:
    #    [4,256,256] INPUT
    # -> SpatialConvolution(nInputPlane=4, nOutputPlane=6, kernelW=5, kH=5, dimensionW=1, dH=1) -- dimension(Width|Height) defaults to 1
    #    -> outWidth = (width - kernelWidth) * dimensionWidth  + 1 = (256 - 5) * 1 + 1 = 252
    #    -> outHeight= (height- kernelHeight) *dimensionHeight + 1 = (256 - 5) * 1 + 1 = 252
    # -> SpatialMaxPooling(2,2,2,2) -- pad(Width|Height) defaults to 0
    #    -> outWidth = (width + 2*padWidth - kernelWidth) / dimensionWidth + 1

    image_width_height = options[:image_size].max
    kernel_width = 5
    input_layer = 120*2
    interm_layer = 84*2
    output_layer = 2

    view_size = ((image_width_height - kernel_width) * 1 + 1)
    view_size = view_size/2
    view_size = ((view_size - kernel_width) * 1 + 1)
    view_size = view_size/2

    torch.eval(<<-EOF, __FILE__, __LINE__).to_h
        net = nn.Sequential()                                                   --                                                                         [ 4,256,256]   3,32,32
        net:add(nn.SpatialConvolution(4, 6, #{kernel_width}, #{kernel_width}))  -- 4 input image channels, 6 output channels, 5x5 convolution kernel    -> [ 6,252,252]   6,28,28
        net:add(nn.SpatialMaxPooling(2,2,2,2))                                  -- A max-pooling operation that looks at 2x2 windows and finds the max. -> [ 6,126,126]   6,14,14
        net:add(nn.SpatialConvolution(6, 16, 5, 5))                             --                                                                      -> [16,122,122]  16,10,10
        net:add(nn.SpatialMaxPooling(2,2,2,2))                                  --                                                                      -> [16, 61, 61]  16, 5, 5
        net:add(nn.View(#{16 * view_size * view_size}))                         -- reshapes from a 4D tensor of 16x5x5 into 1D tensor of 16*5*5         -> [59536]       400
        net:add(nn.Linear(#{16 * view_size * view_size}, #{input_layer})  )     -- fully connected layer (matrix multiplication between input and weights)->   120 <-- randomly choosen
        net:add(nn.Linear(#{input_layer}, #{interm_layer}))                     --                                                                      ->      84 <-- randomly choosen
        net:add(nn.Linear(#{interm_layer}, #{output_layer}))                    -- 2 is the number of outputs of the network (in this case, 2 digits)   ->       2 <-- number of labels
        net:add(nn.LogSoftMax())                                                --                                                                      ->       1 <-- which label?

        local d = {}
        for i,module in ipairs(net:listModules()) do
          inSize = "[]"
          outSize = "[]"
          pcall(function () inSize = #module.input end)
          pcall(function () outSize = #module.output end)
          table.insert(d, {tostring(module), inSize, outSize} )
        end
        return d
    EOF
  end