class Sportsflix::Utils::HTTP

Constants

DEFAULT_HEADERS
DEFAULT_USER_AGENTS

Public Class Methods

new() click to toggle source
# File lib/sportsflix/utils/http.rb, line 21
def initialize
  @headers = DEFAULT_HEADERS
  @params  = {}
end

Public Instance Methods

get(raw_url, extra_params = {}, extra_headers = {}) click to toggle source
# File lib/sportsflix/utils/http.rb, line 26
def get(raw_url, extra_params = {}, extra_headers = {})
  uri       = URI.parse(raw_url)
  uri.query = URI.encode_www_form(@params.merge(extra_params))
  req       = Net::HTTP::Get.new(uri, @headers.merge(extra_headers))

  session         = Net::HTTP.new(uri.host, uri.port)
  session.use_ssl = true if req.uri.scheme == 'https'
  res             = session.start {|http| http.request(req)}

  get_lambda = lambda {|url| get(url, extra_params, extra_headers)}

  with_cf_bypass(res, get_lambda)
end
post(raw_url, body, extra_params = {}, extra_headers = {}) click to toggle source
# File lib/sportsflix/utils/http.rb, line 40
def post(raw_url, body, extra_params = {}, extra_headers = {})
  uri            = URI.parse(raw_url)
  uri.query      = URI.encode_www_form(@params.merge(extra_params))
  merged_headers = @headers
                       .merge({'Content-Type' => 'application/x-www-form-urlencoded'})
                       .merge(extra_headers)
  req            = Net::HTTP::Post.new(uri, merged_headers)

  session         = Net::HTTP.new(uri.host, uri.port)
  session.use_ssl = true if req.uri.scheme == 'https'
  res             = session.start {|http| http.request(req)}

  post_lambda = lambda {|url| post(url, body, extra_params, extra_headers)}

  with_cf_bypass(res, post_lambda)
end

Private Instance Methods

needs_cf_answer(res) click to toggle source
# File lib/sportsflix/utils/http.rb, line 83
def needs_cf_answer(res)
  res.is_a?(Net::HTTPServiceUnavailable) &&
      res['Server'].include?('cloudflare') &&
      res.body.include?('jschl_vc') &&
      res.body.include?('jschl_answer')
end
solve_challenge(body) click to toggle source

Source: github.com/Anorov/cloudflare-scrape/blob/master/cfscrape/__init__.py#L115

# File lib/sportsflix/utils/http.rb, line 91
def solve_challenge(body)
  begin
    js = /setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n/.match(body)
  rescue
    puts 'Unable to identify Cloudflare IUAM Javascript on website.'
    exit(1)
  end

  js = js.gsub("a\.value = (.+ \+ t\.length).+", "\1")
  js = js.gsub("\s{3,}[a-z](?: = |\.).+", '')

  # Strip characters that could be used to exit the string context
  # These characters are not currently used in Cloudflare's arithmetic snippet
  js = js.gsub("[\n\\']", '')

  unless js.include?('parseInt')
    puts 'Error parsing Cloudflare IUAM Javascript challenge.'
    exit(1)
  end

  begin
    js     = "return require('vm').runInNewContext('#{js}');"
    result = ExecJS.eval(js)
  rescue
    puts 'Error executing Cloudflare IUAM Javascript.'
    exit(1)
  end

  begin
    result = result.to_i
  rescue
    puts 'Cloudflare IUAM challenge returned unexpected value.'
    exit(1)
  end

  result.to_s
end
with_cf_bypass(res, req_lambda) click to toggle source
# File lib/sportsflix/utils/http.rb, line 58
def with_cf_bypass(res, req_lambda)
  if needs_cf_answer(res)
    url = "#{res.uri.scheme}://#{res.uri.hostname}/cdn-cgi/l/chk_jschl"

    @headers = @headers.merge({Referer: res.uri.to_s})
    @params  = @params.merge(
        {
            jschl_vc:     /name="jschl_vc" value="(\w+)"/.match(res.body),
            pass:         /name="pass" value="(.+?)"/.match(res.body),
            jschl_answer: "#{solve_challenge(res.body)}#{res.hostname.size}"
        }
    )

    redirect = req_lambda.call(url)
    req_lambda.call(redirect['location'])
  else
    if res.is_a?(Net::HTTPRedirection)
      puts "Redirecting from #{res.uri} to #{res['location']}."
      req_lambda.call(res['location'])
    else
      res
    end
  end
end