module BlackStack::Netting


Network


Constants

CALL_METHOD_GET
CALL_METHOD_POST
DEFAULT_SSL_VERIFY_MODE
SUCCESS

Public Class Methods

add_param(url, param_name, param_value) click to toggle source

Add a parameter to the url. It doesn't validate if the param already exists.

# File lib/functions.rb, line 708
def self.add_param(url, param_name, param_value)
  uri = URI(url)
  params = URI.decode_www_form(uri.query || '')
  
  if (params.size==0)
    params << [param_name, param_value]
    uri.query = URI.encode_www_form(params)
    return uri.to_s
  else
    uri.query = URI.encode_www_form(params)
    return uri.to_s + "&" + param_name + "=" + param_value    
  end
end
api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5) click to toggle source
# File lib/functions.rb, line 587
def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5)
  nTries = 0
  bSuccess = false
  parsed = nil
  sError = ""
  while (nTries < max_retries && bSuccess == false)
    begin
      nTries = nTries + 1
      uri = URI(url)
      res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST
      res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET
      parsed = JSON.parse(res.body)
      if (parsed['status']==BlackStack::Netting::SUCCESS)
        bSuccess = true
      else
        sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}." 
      end
    rescue Errno::ECONNREFUSED => e
      sError = "Errno::ECONNREFUSED:" + e.to_console
    rescue => e2
      sError = "Exception:" + e2.to_console
    end
  end # while

  if (bSuccess==false)
    raise "#{sError}"
  end
end
call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) click to toggle source

New call_get

# File lib/functions.rb, line 517
def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) 
  uri = URI(url)
  uri.query = URI.encode_www_form(params)
  Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
    req = Net::HTTP::Get.new uri
    #req.body = body if !body.nil?
    res = http.request req
    case res
    when Net::HTTPSuccess then res
    when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params, false) if support_redirections
    else
      res.error!
    end
  end
end
call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) click to toggle source

Call the API and return th result. url: valid internet address params: hash of params to attach in the call ssl_verify_mode: you can disabele SSL verification here. max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels. TODO: setup max_simultaneus_calls in the configurtion file.

# File lib/functions.rb, line 539
    def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true)
=begin
      # build the lockfile name
      x = 0
      if BlackStack::Netting.max_api_call_channels.to_i > 0
        raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size
        x = rand(BlackStack::Netting.max_api_call_channels)
        # lock the file
        BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile
      end
=end
      begin
#puts
#puts "call_post:#{url}:."
        # do the call
        uri = URI(url)
        ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
          req = Net::HTTP::Post.new(uri)
          req['Content-Type'] = 'application/json'
          req.set_form_data(params)
          #req.body = body if !body.nil?
          res = http.request req
          case res 
          when Net::HTTPSuccess then res
          when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false) if support_redirections
          else
            res.error!
          end
        end
        
        # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
      rescue => e
        # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
        
        # elevo la excepcion
        raise e
      ensure
        # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
      end
      
      # return
      ret
    end
change_param(url, param_name, param_value) click to toggle source

Changes the value of a parameter in the url. It doesn't validate if the param already exists.

# File lib/functions.rb, line 723
def self.change_param(url, param_name, param_value)
  uri = URI(url)
#  params = URI.decode_www_form(uri.query || [])
  params = CGI.parse(uri.query)
  params["start"] = param_value
  uri.query = URI.encode_www_form(params)
  uri.to_s
end
download(url, to) click to toggle source

Download a file from an url to a local folder. url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception. to: must be a valid path to a folder.

# File lib/functions.rb, line 619
def self.download(url, to)
  uri = URI(url)
  domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host    
  path = uri.path
  filename = path.split("/").last
  Net::HTTP.start(domain) do |http|
    resp = http.get(path)
    open(to, "wb") do |file|
      file.write(resp.body)
    end
  end
end
file_age(filename) click to toggle source

returns the age in days of the given file

# File lib/functions.rb, line 655
def self.file_age(filename)
  (Time.now - File.ctime(filename))/(24*3600)
end
getDomainFromEmail(email) click to toggle source
# File lib/functions.rb, line 760
def self.getDomainFromEmail(email)
  if email.email?
    return email.split("@").last
  else
    raise "getDomainFromEmail: Wrong email format."
  end
end
getDomainFromUrl(url) click to toggle source

get the domain from any url

# File lib/functions.rb, line 744
def self.getDomainFromUrl(url)
  if (url !~ /^http:\/\//i && url !~ /^https:\/\//i) 
    url = "http://#{url}"
  end
  
  if (URI.parse(url).host == nil)
    raise "Cannot get domain for #{url}" 
  end

  if (url.to_s.length>0)
    return URI.parse(url).host.sub(/^www\./, '')
  else
    return nil
  end
end
getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false) click to toggle source
# File lib/functions.rb, line 768
def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false)
  a = Array.new
  c = Whois::Client.new
  r = c.lookup(domain)

  res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  # remover duplicados
  a = a.uniq

  #
  if (allow_heuristic_to_avoid_hosting_companies==true)
    # TODO: develop this feature
  end

  return a
end
get_host_without_www(url) click to toggle source

Removes the 'www.' from an URL.

# File lib/functions.rb, line 639
def self.get_host_without_www(url)
  url = "http://#{url}" if URI.parse(url).scheme.nil?
  host = URI.parse(url).host.downcase
  host.start_with?('www.') ? host[4..-1] : host
end
get_redirect(url) click to toggle source

Get the final URL if a web page is redirecting.

# File lib/functions.rb, line 646
def self.get_redirect(url)
  uri = URI.parse(url)
  protocol = uri.scheme
  host = uri.host.downcase
  res = Net::HTTP.get_response(uri)
  "#{protocol}://#{host}#{res['location']}"
end
get_url_extension(url) click to toggle source

Return the extension of the last path into an URL. Example: get_url_extension(“connect.data.com/sitemap_index.xml?foo_param=foo_value”) => “.xml”

# File lib/functions.rb, line 634
def self.get_url_extension(url)
  return File.extname(URI.parse(url).path.to_s)
end
lockfiles() click to toggle source
# File lib/functions.rb, line 488
def self.lockfiles()
  @@lockfiles
end
max_api_call_channels() click to toggle source
# File lib/functions.rb, line 484
def self.max_api_call_channels()
  @@max_api_call_channels
end
params(url) click to toggle source

# TODO: Is not guaranteed this function works with 100% of the redirect-urls. This problem requires analysis and development of a general purpose algorith # This function gets the final url from a redirect url. # Not all the redirect-urls works the same way. # Below are 3 examples. Each one works with 1 of the 2 strategies applied by this funcion. # => url = “www.google.com.ar/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CB0QFjAAahUKEwjCg8zMsNvGAhXMMj4KHWBfA50&url=https%3A%2F%2Fwww.linkedin.com%2Fpub%2Fdavid-bell%2F5%2F76a%2F12&ei=IGalVcLzFMzl-AHgvo3oCQ&usg=AFQjCNGMbF2vRIOWsRjF-bjjoG6Nl1wg_g&sig2=ZP6ZbZxpmTHw82rIP7YYew&bvm=bv.97653015,d.cWw” # => url = “www.google.com.ar/url?q=https://www.linkedin.com/pub/mark-greene/2/bb8/b59&sa=U&ved=0CDoQFjAIahUKEwiqivi5sdvGAhWJg5AKHSzkB5o&usg=AFQjCNGE09H9hf92mfvwPVnComssDjBBCw” # If the url is not a redirect-url, this function returns the same url.

def get_redirect(url)
  begin
    res = nil
    httpc = HTTPClient.new
    resp = httpc.get(url)
    res = resp.header['Location']
    if res.size == 0
      uri = URI.parse(url)
      uri_params = CGI.parse(uri.query)
      redirected_url = uri_params['url'][0]            
      if ( redirected_url != nil )
        res = redirected_url
      else
        res = url
      end
    else
      res = res[0]
    end
  rescue
    res = url
  end
  return res
end

end

# returns a hash with the parametes in the url

# File lib/functions.rb, line 695
def self.params(url)
  # TODO: Corregir este parche:
  # => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros.
  # => Ejecutar las 2 lineas de abajo para verificar.
  # => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir"
  # => p = CGI::parse(URI.parse(url).query)
  # => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla.
  url = url.gsub("webhp#q=", "webhp?q=")

  return CGI::parse(URI.parse(url).query)
end
set(h) click to toggle source
# File lib/functions.rb, line 492
def self.set(h)
  @@max_api_call_channels = h[:max_api_call_channels]
  @@lockfiles = []
  
  i = 0
  while i<@@max_api_call_channels
    @@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w")
    i+=1
  end
end
set_param(url, param_name, param_value) click to toggle source

Change or add the value of a parameter in the url, depending if the parameter already exists or not.

# File lib/functions.rb, line 733
def self.set_param(url, param_name, param_value)
  params = BlackStack::Netting::params(url) 
  if ( params.has_key?(param_name) == true )
    newurl = BlackStack::Netting::change_param(url, param_name, param_value)
  else
    newurl = BlackStack::Netting::add_param(url, param_name, param_value)
  end
  return newurl
end