module BlackStack::Netting
Network
Constants
- CALL_METHOD_GET
- CALL_METHOD_POST
- DEFAULT_SSL_VERIFY_MODE
- SUCCESS
Public Class Methods
Add a parameter to the url. It doesn't validate if the param already exists.
# File lib/functions.rb, line 708 def self.add_param(url, param_name, param_value) uri = URI(url) params = URI.decode_www_form(uri.query || '') if (params.size==0) params << [param_name, param_value] uri.query = URI.encode_www_form(params) return uri.to_s else uri.query = URI.encode_www_form(params) return uri.to_s + "&" + param_name + "=" + param_value end end
# File lib/functions.rb, line 587 def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5) nTries = 0 bSuccess = false parsed = nil sError = "" while (nTries < max_retries && bSuccess == false) begin nTries = nTries + 1 uri = URI(url) res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET parsed = JSON.parse(res.body) if (parsed['status']==BlackStack::Netting::SUCCESS) bSuccess = true else sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}." end rescue Errno::ECONNREFUSED => e sError = "Errno::ECONNREFUSED:" + e.to_console rescue => e2 sError = "Exception:" + e2.to_console end end # while if (bSuccess==false) raise "#{sError}" end end
New call_get
# File lib/functions.rb, line 517 def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) uri = URI(url) uri.query = URI.encode_www_form(params) Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http| req = Net::HTTP::Get.new uri #req.body = body if !body.nil? res = http.request req case res when Net::HTTPSuccess then res when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params, false) if support_redirections else res.error! end end end
Call the API and return th result. url: valid internet address params: hash of params to attach in the call ssl_verify_mode: you can disabele SSL verification here. max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels. TODO: setup max_simultaneus_calls in the configurtion file.
# File lib/functions.rb, line 539 def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) =begin # build the lockfile name x = 0 if BlackStack::Netting.max_api_call_channels.to_i > 0 raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size x = rand(BlackStack::Netting.max_api_call_channels) # lock the file BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile end =end begin #puts #puts "call_post:#{url}:." # do the call uri = URI(url) ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http| req = Net::HTTP::Post.new(uri) req['Content-Type'] = 'application/json' req.set_form_data(params) #req.body = body if !body.nil? res = http.request req case res when Net::HTTPSuccess then res when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false) if support_redirections else res.error! end end # release the file # BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0 rescue => e # release the file # BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0 # elevo la excepcion raise e ensure # release the file # BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0 end # return ret end
Changes the value of a parameter in the url. It doesn't validate if the param already exists.
# File lib/functions.rb, line 723 def self.change_param(url, param_name, param_value) uri = URI(url) # params = URI.decode_www_form(uri.query || []) params = CGI.parse(uri.query) params["start"] = param_value uri.query = URI.encode_www_form(params) uri.to_s end
Download a file from an url to a local folder. url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception. to: must be a valid path to a folder.
# File lib/functions.rb, line 619 def self.download(url, to) uri = URI(url) domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host path = uri.path filename = path.split("/").last Net::HTTP.start(domain) do |http| resp = http.get(path) open(to, "wb") do |file| file.write(resp.body) end end end
returns the age in days of the given file
# File lib/functions.rb, line 655 def self.file_age(filename) (Time.now - File.ctime(filename))/(24*3600) end
# File lib/functions.rb, line 760 def self.getDomainFromEmail(email) if email.email? return email.split("@").last else raise "getDomainFromEmail: Wrong email format." end end
get the domain from any url
# File lib/functions.rb, line 744 def self.getDomainFromUrl(url) if (url !~ /^http:\/\//i && url !~ /^https:\/\//i) url = "http://#{url}" end if (URI.parse(url).host == nil) raise "Cannot get domain for #{url}" end if (url.to_s.length>0) return URI.parse(url).host.sub(/^www\./, '') else return nil end end
# File lib/functions.rb, line 768 def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false) a = Array.new c = Whois::Client.new r = c.lookup(domain) res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first if (res!=nil) a << BlackStack::Netting::getDomainFromEmail(res[0].downcase) end res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first if (res!=nil) a << BlackStack::Netting::getDomainFromEmail(res[0].downcase) end res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first if (res!=nil) a << BlackStack::Netting::getDomainFromEmail(res[0].downcase) end # remover duplicados a = a.uniq # if (allow_heuristic_to_avoid_hosting_companies==true) # TODO: develop this feature end return a end
Removes the 'www.' from an URL.
# File lib/functions.rb, line 639 def self.get_host_without_www(url) url = "http://#{url}" if URI.parse(url).scheme.nil? host = URI.parse(url).host.downcase host.start_with?('www.') ? host[4..-1] : host end
Get the final URL if a web page is redirecting.
# File lib/functions.rb, line 646 def self.get_redirect(url) uri = URI.parse(url) protocol = uri.scheme host = uri.host.downcase res = Net::HTTP.get_response(uri) "#{protocol}://#{host}#{res['location']}" end
Return the extension of the last path into an URL. Example: get_url_extension
(“connect.data.com/sitemap_index.xml?foo_param=foo_value”) => “.xml”
# File lib/functions.rb, line 634 def self.get_url_extension(url) return File.extname(URI.parse(url).path.to_s) end
# File lib/functions.rb, line 488 def self.lockfiles() @@lockfiles end
# File lib/functions.rb, line 484 def self.max_api_call_channels() @@max_api_call_channels end
# TODO: Is not guaranteed this function works with 100% of the redirect-urls. This problem requires analysis and development of a general purpose algorith # This function gets the final url from a redirect url. # Not all the redirect-urls works the same way. # Below are 3 examples. Each one works with 1 of the 2 strategies applied by this funcion. # => url = “www.google.com.ar/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CB0QFjAAahUKEwjCg8zMsNvGAhXMMj4KHWBfA50&url=https%3A%2F%2Fwww.linkedin.com%2Fpub%2Fdavid-bell%2F5%2F76a%2F12&ei=IGalVcLzFMzl-AHgvo3oCQ&usg=AFQjCNGMbF2vRIOWsRjF-bjjoG6Nl1wg_g&sig2=ZP6ZbZxpmTHw82rIP7YYew&bvm=bv.97653015,d.cWw” # => url = “www.google.com.ar/url?q=https://www.linkedin.com/pub/mark-greene/2/bb8/b59&sa=U&ved=0CDoQFjAIahUKEwiqivi5sdvGAhWJg5AKHSzkB5o&usg=AFQjCNGE09H9hf92mfvwPVnComssDjBBCw” # If the url is not a redirect-url, this function returns the same url.
def get_redirect(url) begin res = nil httpc = HTTPClient.new resp = httpc.get(url) res = resp.header['Location'] if res.size == 0 uri = URI.parse(url) uri_params = CGI.parse(uri.query) redirected_url = uri_params['url'][0] if ( redirected_url != nil ) res = redirected_url else res = url end else res = res[0] end rescue res = url end return res end
end¶ ↑
# returns a hash with the parametes in the url
# File lib/functions.rb, line 695 def self.params(url) # TODO: Corregir este parche: # => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros. # => Ejecutar las 2 lineas de abajo para verificar. # => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir" # => p = CGI::parse(URI.parse(url).query) # => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla. url = url.gsub("webhp#q=", "webhp?q=") return CGI::parse(URI.parse(url).query) end
# File lib/functions.rb, line 492 def self.set(h) @@max_api_call_channels = h[:max_api_call_channels] @@lockfiles = [] i = 0 while i<@@max_api_call_channels @@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w") i+=1 end end
Change or add the value of a parameter in the url, depending if the parameter already exists or not.
# File lib/functions.rb, line 733 def self.set_param(url, param_name, param_value) params = BlackStack::Netting::params(url) if ( params.has_key?(param_name) == true ) newurl = BlackStack::Netting::change_param(url, param_name, param_value) else newurl = BlackStack::Netting::add_param(url, param_name, param_value) end return newurl end