class Paperweight::Download
Most of this from gist.github.com/janko-m/7cd94b8b4dd113c2c193
Constants
- DOWNLOAD_ERRORS
- Error
Public Class Methods
download(url)
click to toggle source
# File lib/paperweight/download.rb, line 35 def self.download(url) new.download(url) end
Public Instance Methods
download(url)
click to toggle source
# File lib/paperweight/download.rb, line 18 def download(url) # Finally we download the file. Here we mustn't use simple #open that # open-uri overrides, because this is vulnerable to shell execution # attack (if #open method detects a starting pipe (e.g. "| ls"), it will # execute the following as a shell command). normalize_download(uri_from(url).open(open_options)) rescue *DOWNLOAD_ERRORS => error message = error.message # open-uri will throw a RuntimeError when it detects a redirection loop, # so we want to reraise the exception if it was some other RuntimeError raise if error.is_a?(RuntimeError) && message !~ /redirection/ # We raise our unified Error class raise Error, "download failed (#{url}): #{message}" end
Private Instance Methods
normalize_download(file)
click to toggle source
open-uri will return a StringIO instead of a Tempfile if the filesize is less than 10 KB, so we patch this behaviour by converting it into a Tempfile.
# File lib/paperweight/download.rb, line 44 def normalize_download(file) return file unless file.is_a?(StringIO) # We need to open it in binary mode for Windows users. Tempfile.new('download-', binmode: true).tap do |tempfile| # IO.copy_stream is the most efficient way of data transfer. IO.copy_stream(file, tempfile.path) # We add the metadata that open-uri puts on the file # (e.g. #content_type) OpenURI::Meta.init(tempfile) end end
open_options()
click to toggle source
# File lib/paperweight/download.rb, line 58 def open_options max_size = Paperweight.config.max_size {}.tap do |options| # It was shown that in a random sample approximately 20% of websites # will simply refuse a request which doesn't have a valid User-Agent. options['User-Agent'] = 'Paperweight' # It's good to shield ourselves from files that are too big. open-uri # will call this block as soon as it gets the "Content-Length" header, # which means that we can bail out before we download the file. options[:content_length_proc] = lambda { |size| if size && size > max_size raise Error, "file is too big (max is #{max_size})" end } end end
uri_from(url)
click to toggle source
Disabling :reek:ManualDispatch here because we don't control the URI API
# File lib/paperweight/download.rb, line 78 def uri_from(url) # This will raise an InvalidURIError if the URL is very wrong. It will # still pass for strings like "foo", though. url = URI(url) # We need to check if the URL was either http://, https:// or ftp://, # because these are the only ones we can download from. open-uri will add # the #open method only to these ones, so this is a good check. raise Error, 'url was invalid' unless url.respond_to?(:open) url end