class Downspout::Downloader

The object returned by a call to fetch_url()

Attributes

path[RW]

returns the path to the downloaded file

response[R]

returns the remote response as the appropriate Net::HTTPResponse

response_headers[R]

returns the headers parsed from the remote response

uri[R]

returns the URI parsed from the URL

url[RW]

returns the URL initially given

Public Instance Methods

basename() click to toggle source

Extracts the file name from the URL or uses a default name based on the content-type header

# File lib/downspout/downloader.rb, line 56
def basename
  return @basename unless @basename.nil?

  if !(@path.nil?) then
    @basename = File.basename( @path )
  else
    if !(@uri.path.nil? || @uri.path.empty? || @uri.path == '/')
      @basename = File.basename( @uri.path )
    else
      $logger.debug("downspout | downloader | basename | Bad URI path")
      @basename = 'file.downspout'
    end
  end

  $logger.debug("downspout | downloader | basename | #{@basename} ")

  return @basename
end
disable_curb!() click to toggle source

configure this download NOT to use the Curb library

# File lib/downspout/downloader.rb, line 94
def disable_curb!
  @curb_enabled = false
end
duration() click to toggle source

returns the time taken to download the file

# File lib/downspout/downloader.rb, line 48
def duration
  return nil unless @started_at
  return nil unless @finished_at

  return @finished_at - @started_at
end
enable_curb!() click to toggle source

configure this download to use the Curb library (will fail if Curb is unavailable.)

# File lib/downspout/downloader.rb, line 87
def enable_curb!
  @curb_enabled = true if Downspout::Config.curb_available?

  return @curb_enabled
end
scheme() click to toggle source

returns the protocol or ‘scheme’ of the URL

# File lib/downspout/downloader.rb, line 42
def scheme
  return @uri.scheme unless @uri.nil?
  return nil
end
use_curb?() click to toggle source

will this download use the Curb library?

# File lib/downspout/downloader.rb, line 76
def use_curb?
  return @curb_enabled
end
use_net_http?() click to toggle source

will this download use the default Net/HTTP library?

# File lib/downspout/downloader.rb, line 81
def use_net_http?
  return false if use_curb?
  return true
end

Private Instance Methods

curb_http_download() click to toggle source
# File lib/downspout/downloader.rb, line 301
def curb_http_download
  $logger.debug("downspout | downloader | curb_http_download | Downloading #{@url} ...")

  begin
    curb = Curl::Easy.download( @url, @path) {|c| c.follow_location=true; c.max_redirects = Downspout::Config.max_redirects;}
  rescue Curl::Err::HostResolutionError => dns_err
    $logger.error("downspout | downloader | curb_http_download | Curb/Curl DNS Error | #{@uri.host}")
    raise dns_err
  end

  $logger.debug("downspout | downloader | curb_http_download | Response Code : #{curb.response_code}")

  if ((curb.response_code != 200) and (curb.response_code != 202)) then
    # missing file, failed download - delete the response body [if downloaded]
    remove_file_at_target_path
  end

  # populate the response headers from curb header string
  parse_headers_from_string!( curb.header_str )

  ultimate_url = curb_last_location( curb.header_str )
  if !( ultimate_url == @url ) then
    # re-directed
    @redirected_url = ultimate_url
  end

  # populate a 'proxy' HTTPResponse object with the Curb data...
  hr_klass = Net::HTTPResponse.send('response_class', curb.response_code.to_s)
  $logger.debug("downspout | downloader | curb_http_download | Response Type : #{hr_klass.name}")

  @response = hr_klass.new( @response_headers["HTTP"][:version],
    curb.response_code,
    @response_headers["HTTP"][:message] )

  if !( File.exist?( @path ) ) then
    $logger.error("downspout | downloader | curb_http_download | Missing File at download path : #{@path}")
    return false
  end

  return true
end
curb_last_location( header_string ) click to toggle source
# File lib/downspout/downloader.rb, line 343
def curb_last_location( header_string )
  matches = header_string.scan(/Location\:\s?(.*)\W/)

  return nil if matches.nil?
  return nil if (matches.class == Array) && (matches.last.nil?)

  result = matches.last.first.strip
  $logger.debug("downspout | downloader | curb_last_location | #{result}")
  return result
end
file_name_from_content_disposition() click to toggle source

Extracts filename from Content-Disposition Header per RFC 2183 “tools.ietf.org/html/rfc2183

# File lib/downspout/downloader.rb, line 396
def file_name_from_content_disposition
  file_name = nil

  cd_key = response_headers.keys.select{|k| k =~ /content-disposition/i }.first # TODO: better to use the last?

  $logger.debug("downspout | downloader | file_name_from_content_disposition | cd key : #{cd_key}")
  return nil if cd_key.nil?

  if cd_key then
    disposition = @response_headers[cd_key]
    if disposition then
      # example : Content-Disposition: attachment; filename="iPad_User_Guide.pdf"
      file_name = disposition.match("filename=\"?([^;\"]+)\"?")[1]
    end
  end

  $logger.debug("downspout | downloader | file_name_from_content_disposition | #{file_name}")
  return file_name
end
file_name_from_content_type() click to toggle source
# File lib/downspout/downloader.rb, line 416
def file_name_from_content_type
  ct_key = response_headers.keys.select{|k| k =~ /content-type/i }.first
  return nil unless ct_key

  file_type = @response_headers[ct_key]
  return nil unless file_type

  file_name = "#{@basename || 'default'}.html" if (file_type =~ /html/)
  file_name = "#{@basename || 'default'}.pdf" if (file_type =~ /pdf/) && file_name.nil?

  $logger.debug("downspout | downloader | file_name_from_content_type | #{file_name}")
  return file_name
end
file_name_from_redirect() click to toggle source
# File lib/downspout/downloader.rb, line 430
def file_name_from_redirect
  return nil if @redirected_url.nil?

  my_uri = URI::parse( @redirected_url )

  if !(my_uri.path.nil? || my_uri.path.empty? || my_uri.path == '/')
    return File.basename( my_uri.path )
  else
    $logger.debug("downspout | downloader | basename | Bad URI path")
    return nil
  end
end
generate_file_name() click to toggle source
# File lib/downspout/downloader.rb, line 384
def generate_file_name
  result = nil

  result = file_name_from_content_disposition
  result = file_name_from_redirect if result.nil?
  result = file_name_from_content_type if result.nil?

  return result
end
get_ftp_credential() click to toggle source
# File lib/downspout/downloader.rb, line 165
def get_ftp_credential
  # look up the credentials for this FTP host, preferring the FTPS scheme
  cred = Downspout::Config.credentials.select{|c| c.scheme == "ftps" }.select{ |c| c.host == @uri.host.downcase }.first

  unless cred
    cred = Downspout::Config.credentials.select{|c| c.scheme =~ /ftp/ }.select{ |c| c.host == @uri.host.downcase }.first
  end

  if cred then
    $logger.debug("downspout | downloader | get_ftp_credential | Loaded credentials for #{cred.host} ...")
  else
    $logger.warn("downspout | downloader | get_ftp_credential | No established credentials found for '#{@uri.host}'.")

    # attempt to extract credential from the URL
    cred = Downspout::Credential.create_from_url( @url )

    unless cred
      $logger.warn("downspout | downloader | get_ftp_credential | No embedded credentials found in URL.")
      return nil
    end
  end

  $logger.debug("downspout | downloader | get_ftp_credential | Using embedded credentials found in URL with user: #{cred.user_name}.")
  return cred
end
net_ftp_download() click to toggle source
# File lib/downspout/downloader.rb, line 191
def net_ftp_download
  $logger.debug("downspout | downloader | net_ftp_download | Downloading #{@url} ...")

  cred = get_ftp_credential

  if cred.nil? then
    # proceed anyway - slight possibility it's an un-authorized FTP account...
    $logger.warn("downspout | downloader | net_ftp_download | Proceeding without credentials, assuming unauthorized service ...")
  end

  begin
    ftp = Net::FTP.open( @uri.host ) do |ftp|
      ftp.login( cred.user_name, cred.pass_word ) unless cred.nil?
      ftp.passive
      ftp.chdir( File.dirname( @uri.path ) )
      ftp.getbinaryfile( File.basename(@uri.path), @path )
    end
  rescue Exception => e
    $logger.error("downspout | downloader | net_ftp_download | Exception : #{e}")
    raise e
  end

  if !(File.exist?( @path )) then
    $logger.error("downspout | downloader | net_ftp_download | #{basename} download failed.")
    return false
  end

  return true
end
net_http_download() click to toggle source
# File lib/downspout/downloader.rb, line 221
def net_http_download
  $logger.debug("downspout | downloader | net_http_download | Downloading #{@url} ...")

  begin
    response = net_http_fetch( @url )
    open( @path, "wb" ) do |file|
      file.write(response.body)
    end
  rescue SocketError => dns_err
    $logger.error("downspout | downloader | net_http_download | Net/HTTP DNS Error | #{@uri.host} | #{dns_err.inspect}")
    remove_file_at_target_path
    raise dns_err
  end

  $logger.debug("downspout | downloader | net_http_download | Response Code : #{response.code}")

  # populate the response headers from net/http headers...
  new_header_str = "HTTP/1.1 #{@response.code} #{@response.message}\r\n"
  @response.each_header do |k,v|
    new_header_str += "#{k}: #{v}\r\n"
  end
  parse_headers_from_string!( new_header_str )

  if ((response.code.to_i != 200) and (response.code.to_i != 202)) then
    # missing file, failed download - delete the response body [if downloaded]
    remove_file_at_target_path
    return false
  end

  if !( File.exist?( @path ) ) then
    $logger.error("downspout | downloader | net_http_download | Missing File at download path : #{@path}")
    return false
  end

  return true
end
net_http_fetch( url_str, redirects = 0 ) click to toggle source
# File lib/downspout/downloader.rb, line 258
def net_http_fetch( url_str, redirects = 0 )
  $logger.debug("downspout | downloader | net_http_fetch | URL: #{url_str}, Redirects: #{redirects}.")

  raise Downspout::BadURL, 'URL is missing' if url_str.nil?

  if redirects > Downspout::Config.max_redirects then
    raise Downspout::ExcessiveRedirects, 'HTTP redirect too deep'
  end

  begin
    u = URI.parse( url_str )
  rescue NoMethodError => e
    # convert to Invalid URI as that's the more pertinent issue
    raise URI::InvalidURIError, e.to_s
  end

  http = Net::HTTP.new( u.host, u.port )

  if (u.scheme == "https") then
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE if !(Downspout::Config.ssl_verification?)
  end

  my_request = Net::HTTP::Get.new( u.request_uri )

  # TODO : implement credentials for downloads via net_http_fetch
  my_request.basic_auth 'account', 'p4ssw0rd'

  @response = http.request( my_request )

  case @response
  when Net::HTTPSuccess
    @response
  when Net::HTTPRedirection
    @redirected_url = @response['location']
    # TODO : use the new location to update the file name / extension when unknown
    net_http_fetch( @redirected_url, redirects + 1 )
  else
    $logger.error("downspout | downloader | net_http_fetch | Response : #{@response}")
    @response.error!
  end
end
parse_headers_from_string!( header_str ) click to toggle source
# File lib/downspout/downloader.rb, line 354
def parse_headers_from_string!( header_str )
  #  $logger.debug("downspout | downloader | parse_headers_from_string! | Header String : #{header_str}")
  header_hash = {}

  headers = header_str.split("\r\n")
  http_info = headers[0]

  http_hash = {}
  http_hash[:header] = http_info
  http_hash[:version] = http_info.split(" ")[0].match("HTTP/([0-9\.]+)")[1]
  http_hash[:code] = (http_info.split("\r\n")[0].split(" ")[1]).to_i
  http_hash[:message] = http_info.split("\r\n")[0].split(" ")[2]

  header_hash["HTTP"] = http_hash

  headers[1..-1].each do |line|
    next if line.nil? || line.empty?
    begin
      matches = line.match(/([\w\-\s]+)\:\s?(.*)/)
      next if matches.nil? || matches.size < 3
      header_name, header_value = matches[1..2]
      header_hash[header_name] = header_value
    rescue Exception => e
      $logger.warn("downspout | downloader | parse_headers_from_string! | #{line}, Exception : #{e}")
    end
  end

  @response_headers = header_hash
end
remove_file_at_target_path() click to toggle source
# File lib/downspout/downloader.rb, line 158
def remove_file_at_target_path
  if File.exist?( @path ) then
    $logger.debug("downspout | downloader | remove_file_at_target_path | Removing #{@path} ... ")
    FileUtils.rm( @path )
  end
end