class Scruber::QueueAdapters::AbstractAdapter::Page
Queue
page wrapper
@author Ivan Goncharov
@attr [Object] id ID of page. Will be autogenerated if not passed @attr [String] url URL of page @attr [String] method Request method, post, get, head @attr [String] user_agent
Fixed User-Agent for requesting this page @attr [Hash] headers Headers for requesting this page @attr [Object] fetcher_agent_id
ID of FetcherAgent, assigned to this page @attr [Object] proxy_id
ID of proxy, assigned to this page @attr [String] response_body
Response body @attr [Integer] response_code
Response code @attr [Hash] response_headers
Response headers @attr [Float] response_total_time
Response total time @attr [Integer] retry_at
Minimal timestamp of next retry @attr [Integer] fetched_at
Download completion timestamp @attr [Integer] retry_count
Number of download attempts @attr [Integer] max_retry_times
Max number of download attempts @attr [Integer] enqueued_at
Timestamp added to the queue @attr [String] page_type
Page
type @attr [Scruber::QueueAdapters::AbstractAdapter::Page] queue Queue
object @attr [Integer] priority Priority of page in queue for fetcher @attr [Integer] processed_at
Processed by parser timestamp @attr [Hash] options All options
Attributes
Public Class Methods
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 60 def initialize(queue, options={}) @queue = queue options = options.with_indifferent_access @options = options @id = options.fetch(:id) { generate_page_id } @url = options.fetch(:url) { raise "URL not provided" } @method = options.fetch(:method) { :get } @user_agent = options.fetch(:user_agent) { nil } @body = options.fetch(:body) { nil } @headers = options.fetch(:headers) { {} } @fetcher_agent_id = options.fetch(:fetcher_agent_id) { nil } @proxy_id = options.fetch(:proxy_id) { nil } @response_body = options.fetch(:response_body) { nil } @response_code = options.fetch(:response_code) { nil } @response_headers = options.fetch(:response_headers) { {} } @response_total_time = options.fetch(:response_total_time) { nil } @retry_at = options.fetch(:retry_at) { 0 } @fetched_at = options.fetch(:fetched_at) { 0 } @retry_count = options.fetch(:retry_count) { 0 } @max_retry_times = options.fetch(:max_retry_times) { nil } @enqueued_at = options.fetch(:enqueued_at) { 0 } @page_type = options.fetch(:page_type) { :seed } # @queue = options.fetch(:queue) { 'default' } @priority = options.fetch(:priority) { 0 } @processed_at = options.fetch(:processed_at) { 0 } @_fetcher_agent = false @_proxy = false @_redownload = false end
Public Instance Methods
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 147 def [](k) instance_variable_get("@#{k.to_s}") end
Delete page from queue
@return [void]
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 155 def delete raise NotImplementedError end
Returns assigned to this page FetcherAgent
@return [Scruber::Helpers::FetcherAgent] Agent object
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 96 def fetcher_agent if @_fetcher_agent == false @_fetcher_agent = (@fetcher_agent_id ? Scruber::Helpers::FetcherAgent.find(@fetcher_agent_id) : nil) else @_fetcher_agent end end
Mark page as processed by parser and save it
@return [void]
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 163 def processed! @processed_at = Time.now.to_i @_redownload = false save end
Returns assigned to this page proxy
@return [Proxy] proxy object
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 108 def proxy if @_proxy == false @_proxy = (@proxy_id ? Scruber::Helpers::ProxyRotator.find(@proxy_id) : nil) else @_proxy end end
Mark page as pending and return to queue
@param new_retry_count [Integer] new count of reties. Allows to reset retries count
@return [void]
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 175 def redownload!(new_retry_count=nil) @_redownload = true @processed_at = 0 if new_retry_count @retry_count = new_retry_count else @retry_count += 1 end @fetched_at = 0 @response_body = nil save end
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 133 def save raise NotImplementedError end
Marked as page for redownloading
@return [Boolean] true if need to redownload
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 193 def sent_to_redownload? @_redownload end
Join url of current page with another path or url @param link_url [String] link
@return [String] joined url
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 143 def url_join(link_url) URI.join(url, link_url).to_s end
Private Instance Methods
# File lib/scruber/queue_adapters/abstract_adapter.rb, line 199 def generate_page_id Digest::MD5.hexdigest @options.slice(:method, :url, :headers, :body).to_json end