class Downspout::Downloader
The object returned by a call to fetch_url()
Attributes
returns the path to the downloaded file
returns the remote response as the appropriate Net::HTTPResponse
returns the headers parsed from the remote response
returns the URI parsed from the URL
returns the URL initially given
Public Instance Methods
Extracts the file name from the URL or uses a default name based on the content-type header
# File lib/downspout/downloader.rb, line 56 def basename return @basename unless @basename.nil? if !(@path.nil?) then @basename = File.basename( @path ) else if !(@uri.path.nil? || @uri.path.empty? || @uri.path == '/') @basename = File.basename( @uri.path ) else $logger.debug("downspout | downloader | basename | Bad URI path") @basename = 'file.downspout' end end $logger.debug("downspout | downloader | basename | #{@basename} ") return @basename end
configure this download NOT to use the Curb library
# File lib/downspout/downloader.rb, line 94 def disable_curb! @curb_enabled = false end
returns the time taken to download the file
# File lib/downspout/downloader.rb, line 48 def duration return nil unless @started_at return nil unless @finished_at return @finished_at - @started_at end
configure this download to use the Curb library (will fail if Curb is unavailable.)
# File lib/downspout/downloader.rb, line 87 def enable_curb! @curb_enabled = true if Downspout::Config.curb_available? return @curb_enabled end
returns the protocol or ‘scheme’ of the URL
# File lib/downspout/downloader.rb, line 42 def scheme return @uri.scheme unless @uri.nil? return nil end
will this download use the Curb library?
# File lib/downspout/downloader.rb, line 76 def use_curb? return @curb_enabled end
will this download use the default Net/HTTP library?
# File lib/downspout/downloader.rb, line 81 def use_net_http? return false if use_curb? return true end
Private Instance Methods
# File lib/downspout/downloader.rb, line 301 def curb_http_download $logger.debug("downspout | downloader | curb_http_download | Downloading #{@url} ...") begin curb = Curl::Easy.download( @url, @path) {|c| c.follow_location=true; c.max_redirects = Downspout::Config.max_redirects;} rescue Curl::Err::HostResolutionError => dns_err $logger.error("downspout | downloader | curb_http_download | Curb/Curl DNS Error | #{@uri.host}") raise dns_err end $logger.debug("downspout | downloader | curb_http_download | Response Code : #{curb.response_code}") if ((curb.response_code != 200) and (curb.response_code != 202)) then # missing file, failed download - delete the response body [if downloaded] remove_file_at_target_path end # populate the response headers from curb header string parse_headers_from_string!( curb.header_str ) ultimate_url = curb_last_location( curb.header_str ) if !( ultimate_url == @url ) then # re-directed @redirected_url = ultimate_url end # populate a 'proxy' HTTPResponse object with the Curb data... hr_klass = Net::HTTPResponse.send('response_class', curb.response_code.to_s) $logger.debug("downspout | downloader | curb_http_download | Response Type : #{hr_klass.name}") @response = hr_klass.new( @response_headers["HTTP"][:version], curb.response_code, @response_headers["HTTP"][:message] ) if !( File.exist?( @path ) ) then $logger.error("downspout | downloader | curb_http_download | Missing File at download path : #{@path}") return false end return true end
# File lib/downspout/downloader.rb, line 343 def curb_last_location( header_string ) matches = header_string.scan(/Location\:\s?(.*)\W/) return nil if matches.nil? return nil if (matches.class == Array) && (matches.last.nil?) result = matches.last.first.strip $logger.debug("downspout | downloader | curb_last_location | #{result}") return result end
Extracts filename from Content-Disposition Header per RFC 2183 “tools.ietf.org/html/rfc2183”
# File lib/downspout/downloader.rb, line 396 def file_name_from_content_disposition file_name = nil cd_key = response_headers.keys.select{|k| k =~ /content-disposition/i }.first # TODO: better to use the last? $logger.debug("downspout | downloader | file_name_from_content_disposition | cd key : #{cd_key}") return nil if cd_key.nil? if cd_key then disposition = @response_headers[cd_key] if disposition then # example : Content-Disposition: attachment; filename="iPad_User_Guide.pdf" file_name = disposition.match("filename=\"?([^;\"]+)\"?")[1] end end $logger.debug("downspout | downloader | file_name_from_content_disposition | #{file_name}") return file_name end
# File lib/downspout/downloader.rb, line 416 def file_name_from_content_type ct_key = response_headers.keys.select{|k| k =~ /content-type/i }.first return nil unless ct_key file_type = @response_headers[ct_key] return nil unless file_type file_name = "#{@basename || 'default'}.html" if (file_type =~ /html/) file_name = "#{@basename || 'default'}.pdf" if (file_type =~ /pdf/) && file_name.nil? $logger.debug("downspout | downloader | file_name_from_content_type | #{file_name}") return file_name end
# File lib/downspout/downloader.rb, line 430 def file_name_from_redirect return nil if @redirected_url.nil? my_uri = URI::parse( @redirected_url ) if !(my_uri.path.nil? || my_uri.path.empty? || my_uri.path == '/') return File.basename( my_uri.path ) else $logger.debug("downspout | downloader | basename | Bad URI path") return nil end end
# File lib/downspout/downloader.rb, line 384 def generate_file_name result = nil result = file_name_from_content_disposition result = file_name_from_redirect if result.nil? result = file_name_from_content_type if result.nil? return result end
# File lib/downspout/downloader.rb, line 165 def get_ftp_credential # look up the credentials for this FTP host, preferring the FTPS scheme cred = Downspout::Config.credentials.select{|c| c.scheme == "ftps" }.select{ |c| c.host == @uri.host.downcase }.first unless cred cred = Downspout::Config.credentials.select{|c| c.scheme =~ /ftp/ }.select{ |c| c.host == @uri.host.downcase }.first end if cred then $logger.debug("downspout | downloader | get_ftp_credential | Loaded credentials for #{cred.host} ...") else $logger.warn("downspout | downloader | get_ftp_credential | No established credentials found for '#{@uri.host}'.") # attempt to extract credential from the URL cred = Downspout::Credential.create_from_url( @url ) unless cred $logger.warn("downspout | downloader | get_ftp_credential | No embedded credentials found in URL.") return nil end end $logger.debug("downspout | downloader | get_ftp_credential | Using embedded credentials found in URL with user: #{cred.user_name}.") return cred end
# File lib/downspout/downloader.rb, line 191 def net_ftp_download $logger.debug("downspout | downloader | net_ftp_download | Downloading #{@url} ...") cred = get_ftp_credential if cred.nil? then # proceed anyway - slight possibility it's an un-authorized FTP account... $logger.warn("downspout | downloader | net_ftp_download | Proceeding without credentials, assuming unauthorized service ...") end begin ftp = Net::FTP.open( @uri.host ) do |ftp| ftp.login( cred.user_name, cred.pass_word ) unless cred.nil? ftp.passive ftp.chdir( File.dirname( @uri.path ) ) ftp.getbinaryfile( File.basename(@uri.path), @path ) end rescue Exception => e $logger.error("downspout | downloader | net_ftp_download | Exception : #{e}") raise e end if !(File.exist?( @path )) then $logger.error("downspout | downloader | net_ftp_download | #{basename} download failed.") return false end return true end
# File lib/downspout/downloader.rb, line 221 def net_http_download $logger.debug("downspout | downloader | net_http_download | Downloading #{@url} ...") begin response = net_http_fetch( @url ) open( @path, "wb" ) do |file| file.write(response.body) end rescue SocketError => dns_err $logger.error("downspout | downloader | net_http_download | Net/HTTP DNS Error | #{@uri.host} | #{dns_err.inspect}") remove_file_at_target_path raise dns_err end $logger.debug("downspout | downloader | net_http_download | Response Code : #{response.code}") # populate the response headers from net/http headers... new_header_str = "HTTP/1.1 #{@response.code} #{@response.message}\r\n" @response.each_header do |k,v| new_header_str += "#{k}: #{v}\r\n" end parse_headers_from_string!( new_header_str ) if ((response.code.to_i != 200) and (response.code.to_i != 202)) then # missing file, failed download - delete the response body [if downloaded] remove_file_at_target_path return false end if !( File.exist?( @path ) ) then $logger.error("downspout | downloader | net_http_download | Missing File at download path : #{@path}") return false end return true end
# File lib/downspout/downloader.rb, line 258 def net_http_fetch( url_str, redirects = 0 ) $logger.debug("downspout | downloader | net_http_fetch | URL: #{url_str}, Redirects: #{redirects}.") raise Downspout::BadURL, 'URL is missing' if url_str.nil? if redirects > Downspout::Config.max_redirects then raise Downspout::ExcessiveRedirects, 'HTTP redirect too deep' end begin u = URI.parse( url_str ) rescue NoMethodError => e # convert to Invalid URI as that's the more pertinent issue raise URI::InvalidURIError, e.to_s end http = Net::HTTP.new( u.host, u.port ) if (u.scheme == "https") then http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_NONE if !(Downspout::Config.ssl_verification?) end my_request = Net::HTTP::Get.new( u.request_uri ) # TODO : implement credentials for downloads via net_http_fetch my_request.basic_auth 'account', 'p4ssw0rd' @response = http.request( my_request ) case @response when Net::HTTPSuccess @response when Net::HTTPRedirection @redirected_url = @response['location'] # TODO : use the new location to update the file name / extension when unknown net_http_fetch( @redirected_url, redirects + 1 ) else $logger.error("downspout | downloader | net_http_fetch | Response : #{@response}") @response.error! end end
# File lib/downspout/downloader.rb, line 354 def parse_headers_from_string!( header_str ) # $logger.debug("downspout | downloader | parse_headers_from_string! | Header String : #{header_str}") header_hash = {} headers = header_str.split("\r\n") http_info = headers[0] http_hash = {} http_hash[:header] = http_info http_hash[:version] = http_info.split(" ")[0].match("HTTP/([0-9\.]+)")[1] http_hash[:code] = (http_info.split("\r\n")[0].split(" ")[1]).to_i http_hash[:message] = http_info.split("\r\n")[0].split(" ")[2] header_hash["HTTP"] = http_hash headers[1..-1].each do |line| next if line.nil? || line.empty? begin matches = line.match(/([\w\-\s]+)\:\s?(.*)/) next if matches.nil? || matches.size < 3 header_name, header_value = matches[1..2] header_hash[header_name] = header_value rescue Exception => e $logger.warn("downspout | downloader | parse_headers_from_string! | #{line}, Exception : #{e}") end end @response_headers = header_hash end
# File lib/downspout/downloader.rb, line 158 def remove_file_at_target_path if File.exist?( @path ) then $logger.debug("downspout | downloader | remove_file_at_target_path | Removing #{@path} ... ") FileUtils.rm( @path ) end end