class Sportsflix::Utils::HTTP
Constants
- DEFAULT_HEADERS
- DEFAULT_USER_AGENTS
Public Class Methods
new()
click to toggle source
# File lib/sportsflix/utils/http.rb, line 21 def initialize @headers = DEFAULT_HEADERS @params = {} end
Public Instance Methods
get(raw_url, extra_params = {}, extra_headers = {})
click to toggle source
# File lib/sportsflix/utils/http.rb, line 26 def get(raw_url, extra_params = {}, extra_headers = {}) uri = URI.parse(raw_url) uri.query = URI.encode_www_form(@params.merge(extra_params)) req = Net::HTTP::Get.new(uri, @headers.merge(extra_headers)) session = Net::HTTP.new(uri.host, uri.port) session.use_ssl = true if req.uri.scheme == 'https' res = session.start {|http| http.request(req)} get_lambda = lambda {|url| get(url, extra_params, extra_headers)} with_cf_bypass(res, get_lambda) end
post(raw_url, body, extra_params = {}, extra_headers = {})
click to toggle source
# File lib/sportsflix/utils/http.rb, line 40 def post(raw_url, body, extra_params = {}, extra_headers = {}) uri = URI.parse(raw_url) uri.query = URI.encode_www_form(@params.merge(extra_params)) merged_headers = @headers .merge({'Content-Type' => 'application/x-www-form-urlencoded'}) .merge(extra_headers) req = Net::HTTP::Post.new(uri, merged_headers) session = Net::HTTP.new(uri.host, uri.port) session.use_ssl = true if req.uri.scheme == 'https' res = session.start {|http| http.request(req)} post_lambda = lambda {|url| post(url, body, extra_params, extra_headers)} with_cf_bypass(res, post_lambda) end
Private Instance Methods
needs_cf_answer(res)
click to toggle source
# File lib/sportsflix/utils/http.rb, line 83 def needs_cf_answer(res) res.is_a?(Net::HTTPServiceUnavailable) && res['Server'].include?('cloudflare') && res.body.include?('jschl_vc') && res.body.include?('jschl_answer') end
solve_challenge(body)
click to toggle source
Source: github.com/Anorov/cloudflare-scrape/blob/master/cfscrape/__init__.py#L115
# File lib/sportsflix/utils/http.rb, line 91 def solve_challenge(body) begin js = /setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n/.match(body) rescue puts 'Unable to identify Cloudflare IUAM Javascript on website.' exit(1) end js = js.gsub("a\.value = (.+ \+ t\.length).+", "\1") js = js.gsub("\s{3,}[a-z](?: = |\.).+", '') # Strip characters that could be used to exit the string context # These characters are not currently used in Cloudflare's arithmetic snippet js = js.gsub("[\n\\']", '') unless js.include?('parseInt') puts 'Error parsing Cloudflare IUAM Javascript challenge.' exit(1) end begin js = "return require('vm').runInNewContext('#{js}');" result = ExecJS.eval(js) rescue puts 'Error executing Cloudflare IUAM Javascript.' exit(1) end begin result = result.to_i rescue puts 'Cloudflare IUAM challenge returned unexpected value.' exit(1) end result.to_s end
with_cf_bypass(res, req_lambda)
click to toggle source
# File lib/sportsflix/utils/http.rb, line 58 def with_cf_bypass(res, req_lambda) if needs_cf_answer(res) url = "#{res.uri.scheme}://#{res.uri.hostname}/cdn-cgi/l/chk_jschl" @headers = @headers.merge({Referer: res.uri.to_s}) @params = @params.merge( { jschl_vc: /name="jschl_vc" value="(\w+)"/.match(res.body), pass: /name="pass" value="(.+?)"/.match(res.body), jschl_answer: "#{solve_challenge(res.body)}#{res.hostname.size}" } ) redirect = req_lambda.call(url) req_lambda.call(redirect['location']) else if res.is_a?(Net::HTTPRedirection) puts "Redirecting from #{res.uri} to #{res['location']}." req_lambda.call(res['location']) else res end end end