class Scruber::QueueAdapters::Memory

Memory Queue Adapter

Simple queue adapted which stores pages in memory. Nice solution for small scrapes. Easy to use. No need to setup any database, but no ability to reparse pages if something went wrong.

@author Ivan Goncharov

Attributes

error_pages[R]

Public Class Methods

new(options={}) click to toggle source

Queue initializer @param options [Hash] See {Scruber::QueueAdapters::AbstractAdapter#initializer}

@return [Scruber::QueueAdapters::Memory] class instance

# File lib/scruber/queue_adapters/memory.rb, line 58
def initialize(options={})
  super(options)
  @processed_ids = []
  @queue = []
  @downloaded_pages = []
  @error_pages = []
end

Public Instance Methods

add(url_or_page, options={}) click to toggle source

Add page to queue @param url_or_page [String|Page] URL of page or Page object @param options [Hash] Other options, see {Scruber::QueueAdapters::AbstractAdapter::Page}

@return [void]

# File lib/scruber/queue_adapters/memory.rb, line 72
def add(url_or_page, options={})
  unless url_or_page.is_a?(Page)
    url_or_page = Page.new(self, options.merge(url: url_or_page))
  end
  @queue.push(url_or_page) unless @processed_ids.include?(url_or_page.id) || find(url_or_page.id)
end
Also aliased as: push
add_downloaded(page) click to toggle source

Internal method to add page to downloaded queue

@param page [Scruber::QueueAdapters::Memory::Page] page

@return [void]

# File lib/scruber/queue_adapters/memory.rb, line 156
def add_downloaded(page)
  @downloaded_pages.push page
end
add_error_page(page) click to toggle source

Internal method to add page to error queue

@param page [Scruber::QueueAdapters::Memory::Page] page

@return [void]

# File lib/scruber/queue_adapters/memory.rb, line 166
def add_error_page(page)
  @error_pages.push page
end
add_processed_page(page) click to toggle source

Saving processed page id to prevent adding identical pages to queue

@param page [Page] page

@return [void]

# File lib/scruber/queue_adapters/memory.rb, line 177
def add_processed_page(page)
  @processed_ids.push page.id
end
delete(page) click to toggle source

Delete page from all internal queues

@param page [Scruber::QueueAdapters::Memory::Page] page

@return [void]

# File lib/scruber/queue_adapters/memory.rb, line 196
def delete(page)
  @queue -= [page]
  @downloaded_pages -= [page]
  @error_pages -= [page]
end
downloaded_count() click to toggle source

Count of downloaded pages Using to show downloading progress.

@return [Integer] count of downloaded pages

# File lib/scruber/queue_adapters/memory.rb, line 107
def downloaded_count
  @downloaded_pages.count
end
fetch_downloaded(count=nil) click to toggle source

Fetch downloaded and not processed pages for feching @param count [Integer] count of pages to fetch

@return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0

# File lib/scruber/queue_adapters/memory.rb, line 116
def fetch_downloaded(count=nil)
  if count.nil?
    @downloaded_pages.shift
  else
    @downloaded_pages.shift(count)
  end
end
fetch_error(count=nil) click to toggle source

Fetch error page @param count [Integer] count of pages to fetch

@return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0

# File lib/scruber/queue_adapters/memory.rb, line 129
def fetch_error(count=nil)
  if count.nil?
    @error_pages.shift
  else
    @error_pages.shift(count)
  end
end
fetch_pending(count=nil) click to toggle source

Fetch pending page for fetching @param count [Integer] count of pages to fetch

@return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0

# File lib/scruber/queue_adapters/memory.rb, line 142
def fetch_pending(count=nil)
  if count.nil?
    @queue.shift
  else
    @queue.shift(count)
  end
end
find(id) click to toggle source

Search page by id @param id [Object] id of page

@return [Page] page

# File lib/scruber/queue_adapters/memory.rb, line 85
def find(id)
  [@queue, @downloaded_pages, @error_pages].each do |q|
    q.each do |i|
      return i if i.id == id
    end
  end
  nil
end
has_work?() click to toggle source

Used by Core. It checks for pages that are not downloaded or not parsed yet.

@return [Boolean] true if queue still has work for scraper

# File lib/scruber/queue_adapters/memory.rb, line 186
def has_work?
  @queue.count > 0 || @downloaded_pages.count > 0
end
initialized?() click to toggle source

Check if queue was initialized. Using for `seed` method. If queue was initialized, then no need to run seed block.

@return [Boolean] true if queue already was initialized

# File lib/scruber/queue_adapters/memory.rb, line 208
def initialized?
  @queue.present? || @downloaded_pages.present? || @error_pages.present?
end
push(url_or_page, options={})
Alias for: add
size() click to toggle source

Size of queue

@return [Integer] count of pages in queue

# File lib/scruber/queue_adapters/memory.rb, line 98
def size
  @queue.count
end