class Figshare::Upload

Upload files to figshare Nb. This can sometimes fail, so you need to check the md5 to ensure the file got there It can take a short while for the md5 to be calculated, so upload, wait, then check for a computed_md5. The status will show as “ic_checking”, “moving_to_final” then to “available”, I have seen it stuck at “moving_to_final”, but with the right computed_md5.

Constants

CHUNK_SIZE

Attributes

article_id[RW]
bad_count[RW]
file_id[RW]
file_info[RW]
file_name[RW]
new_count[RW]
upload_host[RW]
upload_parts_detail[RW]
upload_query[RW]

Public Class Methods

get_file_check_data(filename) click to toggle source

Calculate a local files MD5.

@param filename [String] Path/name of local file to MD5 @return [String,Integer] MD5 as a Hex String, Size of the file in bytes.

# File lib/upload.rb, line 20
def self.get_file_check_data(filename)
  stat_record =  File.stat(filename)
  md5 = Digest::MD5.new
  File.open(filename, 'rb') do |fd|
    while(buffer = fd.read(CHUNK_SIZE)) 
      md5.update(buffer)
    end
  end
  return md5.hexdigest, stat_record.size
end

Public Instance Methods

status() click to toggle source

Get status of the current upload. Just fetches the file record from figshare. Of interest is the status field, and the computed_md5 field

@return [Hash] Figshare file record, or nil, if the call fails

# File lib/upload.rb, line 132
def status
  @file_info = nil
  file_detail(article_id: @article_id, file_id: @file_id) do |f|
    @file_info = f
  end
  raise "Upload::status(): Failed to get figshare file record" if @file_info.nil?
end
upload(article_id:, file_name:, trace: 0) click to toggle source

Upload the file, to the Figshare article

@param article_id [Integer] Figshare article id @param file_name [String] path/file_name to upload @param trace [Integer] 0: no output, 1: per file upload message, 2: fuller trace

# File lib/upload.rb, line 36
def upload(article_id:, file_name:, trace: 0)
  @article_id = article_id
  @file_name = file_name
  @trace = trace
  
  @file_id = nil
  @file_info = nil
  @upload_query = nil
  @upload_host = nil
  @upload_parts_detail  = nil
  
  initiate_new_upload() 
  puts "New File_id: #{@file_id}\n\n" if @trace > 1
  
  get_file_info()
  puts "@file_info: #{@file_info.to_j}\n\n" if @trace > 1
  
  get_upload_parts_details()
  puts "@upload_parts_detail: #{@upload_parts_detail.to_j}\n\n" if @trace > 1
  
  upload_the_parts()
  
  complete_upload()
  if @trace > 1
    status
    puts "Final Status: #{@file_info.to_j}\n\n"
  end
end
upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0) click to toggle source

Upload all files in a directory, into one article. Check checksums, and only upload changed or new files Does not recurse through sub-directories, as figshare has a flat file structure.

@param article_id [Integer] Figshare article id @param directory [String] path @param delete_extras [Boolean] delete any files in the figshare end, that aren't in the local directory. @param trace [Integer] 0: no output, 1: per file upload message, 2: fuller trace

# File lib/upload.rb, line 73
def upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0)
  @new_count = 0
  @bad_count = 0
  
  files = {}
  cache_article_file_md5(article_id: article_id)
  
  DirR.walk_dir(directory: directory, walk_sub_directories: false) do |d,f|
    next if exclude_dot_files && f =~ /^\..*/
    files[f] = true  #note that we have seen this filename
    if @md5_cache[f] #check to see if it has already been uploaded
      md5, size = Upload.get_file_check_data("#{d}/#{f}")
      if @md5_cache[f][:md5] != md5 #file is there, but has changed, or previously failed to upload.
        puts "Deleting: #{article_id} << #{d}/#{f} #{@md5_cache[f][:id]} MISMATCH '#{@md5_cache[f]}' != '#{md5}'" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
        @bad_count += 1
        puts "Re-ADDING: #{article_id} << #{d}/#{f}" if trace > 0
        upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
        @new_count += 1
      elsif trace > 1
        puts "EXISTS: #{article_id} #{d}/#{f}"
      end
    else
      puts "ADDING: #{article_id} << #{d}/#{f}" if trace > 0
      upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
      @new_count += 1
    end
  end
  
  # Print out filename of files in the Figshare article, that weren't in the directory.
  @md5_cache.each do |fn,v|
    if ! files[fn]  
      #File exists on Figshare, but not on the local disk
      if delete_extras
        puts "Deleteing EXTRA: #{article_id} << #{fn} #{v[:id]}" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id]) 
      elsif trace > 0
        puts "EXTRA: #{article_id} << #{fn} #{v[:id]}" 
      end
    end
  end
end

Private Instance Methods

cache_article_file_md5(article_id:) click to toggle source

Retrieve md5 sums of the existing files in the figshare article Sets @md5_cache => figshare.computed_md5

@param article_id [Integer] Figshare article ID

# File lib/upload.rb, line 120
        def cache_article_file_md5(article_id:)
  @md5_cache = {}
  files(article_id: article_id) do |f|
    @md5_cache[f['name']] = {:article_id => article_id, :id => f['id'], :md5 => f[ 'computed_md5']}
  end
end
complete_upload() click to toggle source

Completes the upload. Figshare then calculates the md5 in the background, which may take a while to complete And sometimes the checksum never gets calculated, and is left blank.

# File lib/upload.rb, line 169
        def complete_upload
  post( api_query: "account/articles/#{@article_id}/files/#{@file_id}" )
  puts "complete_upload" if @trace > 1
end
get_file_info() click to toggle source

Gets the Figshare file info We need the upload URLs to continue

# File lib/upload.rb, line 158
        def get_file_info
  status
  @upload_host = @file_info['upload_url'].gsub(/^http.*\/\/(.*)\/upload.*$/, '\1')
  @upload_query = @file_info['upload_url'].gsub(/^http.*\/\/(.*)\/(upload.*)$/, '\2')
  puts "Upload_host: #{@upload_host} URL: #{@upload_query}" if @trace > 1
end
get_upload_parts_details() click to toggle source

Get the upload settings

# File lib/upload.rb, line 176
        def get_upload_parts_details
  @upload_parts_detail = nil
  result = nil
  WIKK::WebBrowser.https_session( host: @upload_host, verify_cert: false ) do |ws|
    result = ws.get_page( query: @upload_query,
                          authorization: "token #{@auth_token}",
                        )
  end
  raise "get_upload_parts_detail(#{@article_id}) failed to get upload URL" if result.nil?
  @upload_parts_detail = JSON.parse(result)
  
  puts "Part URL #{@upload_parts_detail['parts']}" if @trace > 1
end
initiate_new_upload() click to toggle source

Creates a new Figshare file record, in the figshare article, and we get the file_id from the upload URL file status == 'created'

# File lib/upload.rb, line 143
        def initiate_new_upload
  md5, size = Upload.get_file_check_data(@file_name)
  args = {'name' => File.basename(@file_name),
          'md5' => md5,
          'size'=> size
         }
  post( api_query: "account/articles/#{@article_id}/files", args: args ) do |f|
    @file_id = f['location'].gsub(/^.*\/([0-9]+)$/, '\1')
  end
  raise "Upload::initiate_new_upload(): failed to create Figshare file record" if @file_id.nil?
end
upload_part(buffer:, part:) click to toggle source

Upload just one part

# File lib/upload.rb, line 204
        def upload_part(buffer:, part:)
  puts "upload_part(#{part})" if @trace > 1
  WIKK::WebBrowser.https_session( host: @upload_host, verify_cert: false ) do |ws|
    ws.put_req( query: "#{@upload_query}/#{part}",
                authorization: "token #{@auth_token}",
                data: buffer
              )
  end
end
upload_the_parts() click to toggle source

Upload the file in parts

# File lib/upload.rb, line 192
        def upload_the_parts
  parts = @upload_parts_detail['parts']
  File.open(@file_name, 'rb') do |fin|
    parts.each do |part|
      data = fin.read(part['endOffset'] - part['startOffset'] + 1)
      upload_part(buffer: data, part: part['partNo'])
    end
  end
end