module Maremma

Constants

ALLOWED_CONTENT_TAGS
DEFAULT_TIMEOUT
NETWORKABLE_EXCEPTIONS
VERSION

Public Class Methods

delete(url, options = {}) click to toggle source
# File lib/maremma.rb, line 43
def self.delete(url, options = {})
  method(url, options.merge(method: "delete"))
end
faraday_conn(options = {}) click to toggle source
# File lib/maremma.rb, line 119
def self.faraday_conn(options = {})
  # make sure we have headers
  options[:headers] ||= {}

  # set redirect limit
  limit = options[:limit] || 10

  Faraday.new do |c|
    c.ssl.verify = false if options[:ssl_self_signed]
    c.options.params_encoder = Faraday::FlatParamsEncoder
    c.headers["Content-type"] = options[:headers]["Content-type"] if options[:headers]["Content-type"].present?
    c.headers["Accept"] = options[:headers]["Accept"]
    c.headers["User-Agent"] = options[:headers]["User-Agent"]
    c.use      FaradayMiddleware::FollowRedirects, limit: limit, cookie: :all if limit > 0
    c.use      FaradayMiddleware::Gzip
    c.request  :multipart
    c.request  :json if options[:headers]["Accept"] == "application/json"
    c.response :encoding
    c.adapter  :excon
  end
end
from_json(string) click to toggle source
# File lib/maremma.rb, line 269
def self.from_json(string)
  JSON.parse(string)
rescue JSON::ParserError
  nil
end
from_string(string) click to toggle source
# File lib/maremma.rb, line 275
def self.from_string(string)
  string.gsub(/\s+\n/, "\n").strip
end
from_xml(string) click to toggle source

keep XML attributes, stackoverflow.com/a/10794044 escape tags allowed in content

# File lib/maremma.rb, line 258
def self.from_xml(string)
  ALLOWED_CONTENT_TAGS.each do |tag|
    string.gsub!("<#{tag}>", "&lt;#{tag}&gt;")
    string.gsub!("</#{tag}>", "&lt;/#{tag}&gt;")
  end

  if Nokogiri::XML(string, nil, "UTF-8").errors.empty?
    Hash.from_xml(string)
  end
end
get(url, options = {}) click to toggle source
# File lib/maremma.rb, line 47
def self.get(url, options = {})
  method(url, options.merge(method: "get"))
end
get_rate_limit_remaining(headers) click to toggle source

currently supported by Twitter and Github with slightly different header names use arbitrary high value if not supported

# File lib/maremma.rb, line 252
def self.get_rate_limit_remaining(headers)
  (headers["X-Rate-Limit-Remaining"] || headers["X-RateLimit-Remaining"] || 100).to_i
end
head(url, options = {}) click to toggle source
# File lib/maremma.rb, line 51
def self.head(url, options = {})
  method(url, options.merge(method: "head"))
end
is_valid_url?(url) click to toggle source
# File lib/maremma.rb, line 141
def self.is_valid_url?(url)
  parsed = Addressable::URI.parse(url)
  raise TypeError, "Invalid URL: #{url}" unless %w(http https).include?(parsed.scheme)
end
method(url, options = {}) click to toggle source
# File lib/maremma.rb, line 55
def self.method(url, options = {})
  is_valid_url?(url)

  # normalize url
  url = Addressable::URI.parse(url).normalize

  options[:data] ||= {}
  options[:headers] = set_request_headers(url, options)

  conn = faraday_conn(options)

  conn.options[:timeout] = options[:timeout] || DEFAULT_TIMEOUT

  response = case options[:method]
             when "get"
               conn.get url, {}, options[:headers] do |request|
                 request.headers["Host"] = URI.parse(url.to_s).host
               end
             when "post"
               conn.post url, {}, options[:headers] do |request|
                 request.body = options[:data]
                 request.headers["Host"] = URI.parse(url.to_s).host
               end
             when "put"
               conn.put url, {}, options[:headers] do |request|
                 request.body = options[:data]
                 request.headers["Host"] = URI.parse(url.to_s).host
               end
             when "patch"
               conn.patch url, {}, options[:headers] do |request|
                 request.body = options[:data]
                 request.headers["Host"] = URI.parse(url.to_s).host
               end
             when "delete"
               conn.delete url, {}, options[:headers]
             when "head"
               conn.head url, {}, options[:headers]
             end

  # return error if we are close to the rate limit, if supported in headers
  if get_rate_limit_remaining(response.headers) < 3
    return OpenStruct.new(body: { "errors" => [{ "status" => 429, "title" => "Too many requests" }] },
                          headers: response.headers,
                          status: response.status)
  end

  # raise errors now and not in faraday_conn so that we can collect more information
  raise Faraday::ConnectionFailed if response.status == 403
  raise Faraday::ResourceNotFound, "Not found" if response.status == 404
  raise Faraday::TimeoutError if response.status == 408
  raise Faraday::ClientError if response.status >= 400

  OpenStruct.new(body: parse_success_response(response.body, options),
                 headers: response.headers,
                 status: response.status,
                 url: response.env[:url].to_s)
rescue *NETWORKABLE_EXCEPTIONS => e
  error_response = rescue_faraday_error(e, response)
  OpenStruct.new(body: error_response,
                 status: error_response.fetch("errors", {}).first.fetch("status", 400),
                 headers: response ? response.headers : nil,
                 url: response ? response.env[:url].to_s : nil)
end
parse_error_response(string) click to toggle source
# File lib/maremma.rb, line 217
def self.parse_error_response(string)
  string = parse_response(string)

  string = string["hash"] if string.is_a?(Hash) && string["hash"]

  if string.is_a?(Hash) && string["error"]
    string["error"]
  elsif string.is_a?(Hash) && string["errors"]
    string.dig("errors", 0, "title")
  else
    string
  end
end
parse_response(string, options = {}) click to toggle source
# File lib/maremma.rb, line 231
def self.parse_response(string, options = {})
  string = string.dup
  string =
      if options[:skip_encoding]
          string
      else
          string.encode(
              Encoding.find("UTF-8"),
              invalid: :replace,
              undef: :replace,
              replace: "?"
          )
      end
  return string if options[:raw]

  from_json(string) || from_xml(string) || from_string(string)
end
parse_success_response(string, options = {}) click to toggle source
# File lib/maremma.rb, line 201
def self.parse_success_response(string, options = {})
  return nil if options[:method] == "head"

  string = parse_response(string, options)

  if string.blank?
    { "data" => nil }
  elsif string.is_a?(Hash) && string["hash"]
    { "data" => string["hash"] }
  elsif string.is_a?(Hash) && string["data"]
    string
  else
    { "data" => string }
  end
end
patch(url, options = {}) click to toggle source
# File lib/maremma.rb, line 39
def self.patch(url, options = {})
  method(url, options.merge(method: "patch"))
end
post(url, options = {}) click to toggle source

ActiveSupport::XmlMini.backend = “Nokogiri”

# File lib/maremma.rb, line 31
def self.post(url, options = {})
  method(url, options.merge(method: "post"))
end
put(url, options = {}) click to toggle source
# File lib/maremma.rb, line 35
def self.put(url, options = {})
  method(url, options.merge(method: "put"))
end
rescue_faraday_error(error, response) click to toggle source
# File lib/maremma.rb, line 184
def self.rescue_faraday_error(error, response)
  if error.is_a?(Faraday::ResourceNotFound)
    { "errors" => [{ "status" => 404, "title" => "Not found" }] }
  elsif error.message == "the server responded with status 401" || error.try(:response) && error.response[:status] == 401
    { "errors" => [{ "status" => 401, "title" => "Unauthorized" }] }
  elsif error.is_a?(Faraday::ConnectionFailed)
    { "errors" => [{ "status" => 403, "title" => parse_error_response(error.message) }] }

  elsif error.is_a?(Faraday::TimeoutError) || (error.try(:response) && error.response[:status] == 408)
    { "errors" => [{ "status" => 408, "title" => "Request timeout" }] }
  else
    status = response ? response.status : 400
    title = response ? parse_error_response(response.body) : parse_error_response(error.message)
    { "errors" => [{ "status" => status, "title" => title }] }
  end
end
set_request_headers(_url, options = {}) click to toggle source
# File lib/maremma.rb, line 146
def self.set_request_headers(_url, options = {})
  header_options = { "html" => "text/html;charset=UTF-8",
                     "xml" => "application/xml;charset=UTF-8",
                     "json" => "application/json;charset=UTF-8" }

  headers = options[:headers] ||= {}

  # set useragent
  headers["User-Agent"] = ENV["USER_AGENT"] || "Mozilla/5.0 (compatible; Maremma/#{Maremma::VERSION}; +https://github.com/datacite/maremma)"

  # set host, needed for some services behind proxy
  # headers["Host"] = URI.parse(url).host #if options[:host]

  # set Content-Type
  headers["Content-type"] = header_options.fetch(options[:content_type], options[:content_type]) if options[:content_type].present?

  if options[:accept].present?
    headers["Accept"] = header_options.fetch(options[:accept], options[:accept])
  else
    # accept all content
    headers["Accept"] ||= "text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5"
  end

  if options[:bearer].present?
    headers["Authorization"] = "Bearer #{options[:bearer]}"
  elsif options[:token].present?
    headers["Authorization"] = "Token token=#{options[:token]}"
  elsif options[:github_token].present?
    # GitHub uses different format for token authentication
    headers["Authorization"] = "Token #{options[:github_token]}"
  elsif options[:username].present?
    basic = Base64.strict_encode64("#{options[:username]}:#{options[:password]}").chomp
    headers["Authorization"] = "Basic #{basic}"
  end

  headers
end