class ProxyCrawl::StorageAPI

Constants

BASE_URL
INVALID_RID
INVALID_RID_ARRAY
INVALID_TOKEN
INVALID_URL_OR_RID

Attributes

body[R]
original_status[R]
pc_status[R]
rid[R]
status_code[R]
stored_at[R]
token[R]
url[R]

Public Class Methods

new(options = {}) click to toggle source
# File lib/proxycrawl/storage_api.rb, line 17
def initialize(options = {})
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?

  @token = options[:token]
end

Public Instance Methods

bulk(rids_array = []) click to toggle source
# File lib/proxycrawl/storage_api.rb, line 61
def bulk(rids_array = [])
  raise INVALID_RID_ARRAY if rids_array.empty?

  uri = URI("#{BASE_URL}/bulk")
  uri.query = URI.encode_www_form(token: token)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
  request.body = { rids: rids_array }.to_json
  response = http.request(request)

  @body = JSON.parse(response.body)
  @original_status = @body.map { |item| item['original_status'].to_i }
  @status_code = response.code.to_i
  @pc_status = @body.map { |item| item['pc_status'].to_i }
  @url = @body.map { |item| item['url'] }
  @rid = @body.map { |item| item['rid'] }
  @stored_at = @body.map { |item| item['stored_at'] }

  self
end
delete(rid) click to toggle source
# File lib/proxycrawl/storage_api.rb, line 44
def delete(rid)
  raise INVALID_RID if rid.nil? || rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form(token: token, rid: rid)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Delete.new(uri.request_uri)
  response = http.request(request)

  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  @body.key?('success')
end
get(url_or_rid, format = 'html') click to toggle source
# File lib/proxycrawl/storage_api.rb, line 23
def get(url_or_rid, format = 'html')
  raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
  response = Net::HTTP.get_response(uri)

  res = format == 'json' ? JSON.parse(response.body) : response

  @original_status = res['original_status'].to_i
  @pc_status = res['pc_status'].to_i
  @url = res['url']
  @rid = res['rid']
  @stored_at = res['stored_at']

  @status_code = response.code.to_i
  @body = response.body

  self
end
rids(limit = -1) click to toggle source
# File lib/proxycrawl/storage_api.rb, line 82
def rids(limit = -1)
  uri = URI("#{BASE_URL}/rids")
  query_hash = { token: token }
  query_hash.merge!({ limit: limit }) if limit >= 0
  uri.query = URI.encode_www_form(query_hash)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @body = JSON.parse(response.body)
  @rid = @body

  @body
end
total_count() click to toggle source
# File lib/proxycrawl/storage_api.rb, line 97
def total_count
  uri = URI("#{BASE_URL}/total_count")
  uri.query = URI.encode_www_form(token: token)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  body['totalCount']
end

Private Instance Methods

decide_url_or_rid(url_or_rid) click to toggle source
# File lib/proxycrawl/storage_api.rb, line 112
def decide_url_or_rid(url_or_rid)
  %r{^https?://} =~ url_or_rid ? { url: url_or_rid } : { rid: url_or_rid }
end