class BrowserCrawler::Engine

Constants

CUPRITE_OPTIONS
REPORT_SAVE_FOLDER_PATH
SCREENSHOT_OPERATOR_OPTIONS

Attributes

crawl_manager[R]
logger[R]
report_store[R]
screenshot_operator[R]

Public Class Methods

new(browser_options: {}, screenshots_options: {}, max_pages: nil, deep_visit: false, logger: nil) click to toggle source
# File lib/browser_crawler/engine.rb, line 44
def initialize(browser_options: {},
               screenshots_options: {},
               max_pages: nil,
               deep_visit: false,
               logger: nil)
  screenshots_operator_options = SCREENSHOT_OPERATOR_OPTIONS
                                 .merge(screenshots_options)
  @screenshot_operator = ScreenshotOperator.new(**screenshots_operator_options)

  cuprite_options = CUPRITE_OPTIONS.merge(browser_options)

  @logger = logger || Logger.new(STDOUT)

  register_chrome_driver(cuprite_options)
  initialize_report_store(cuprite_options)
  initialize_crawl_manager(max_pages, deep_visit)
end

Public Instance Methods

after(type: :all, &hook) click to toggle source
# File lib/browser_crawler/engine.rb, line 99
def after(type: :all, &hook)
  HooksContainer.instance.add_hook(method: :after, type: type, hook: hook)
end
before(type: :all, &hook) click to toggle source
# File lib/browser_crawler/engine.rb, line 95
def before(type: :all, &hook)
  HooksContainer.instance.add_hook(method: :before, type: type, hook: hook)
end
change_page_scan_rules(&hook) click to toggle source
# File lib/browser_crawler/engine.rb, line 107
def change_page_scan_rules(&hook)
  HooksContainer.instance.add_hook(type: :scan_rules, hook: hook)
end
js_before_run(javascript: '') click to toggle source
# File lib/browser_crawler/engine.rb, line 62
def js_before_run(javascript: '')
  return if javascript.empty?

  @javascript_before_run = javascript
end
report_save(folder_path: '', type: :yaml) click to toggle source
# File lib/browser_crawler/engine.rb, line 88
def report_save(folder_path: '', type: :yaml)
  save_folder_path = folder_path.empty? ? REPORT_SAVE_FOLDER_PATH : folder_path
  ReportFactory.save(store: @report_store,
                     type: type.to_sym,
                     save_folder_path: save_folder_path)
end

Private Instance Methods

initialize_crawl_manager(max_pages, deep_visit) click to toggle source
# File lib/browser_crawler/engine.rb, line 147
def initialize_crawl_manager(max_pages, deep_visit)
  @crawl_manager = EngineUtilities::CrawlManager.new(
    report_store: report_store,
    max_pages: max_pages.to_i,
    deep_visit: deep_visit,
    logger: @logger
  )
end
initialize_crawler(url) click to toggle source
# File lib/browser_crawler/engine.rb, line 113
def initialize_crawler(url)
  Capybara.current_session.quit

  uri               = UrlTools.uri!(url: url)
  Capybara.app_host = "#{uri.scheme}://#{uri.host}:#{uri.port}"

  @report_store.start(url: url)

  return if @javascript_before_run.nil?

  Capybara.current_session
    .driver
    .browser
    .page
    .command('Page.addScriptToEvaluateOnNewDocument',
             source: @javascript_before_run)
end
initialize_report_store(cuprite_options) click to toggle source
# File lib/browser_crawler/engine.rb, line 131
def initialize_report_store(cuprite_options)
  @report_store = Reports::Store.new
  @report_store.metadata[:screenshots_path] = screenshot_operator
                                              .screenshots_folder
  @report_store.metadata[:window_width] = cuprite_options[:window_size][0]
  @report_store.metadata[:window_height] = cuprite_options[:window_size][1]
end
register_chrome_driver(cuprite_options) click to toggle source
# File lib/browser_crawler/engine.rb, line 139
def register_chrome_driver(cuprite_options)
  Capybara.register_chrome_driver(:cuprite_chrome, options: cuprite_options)
  Capybara.run_server             = false
  Capybara.default_driver         = :cuprite_chrome
  # a workaround to extracting data from inactive tabs, dialogs, etc.
  Capybara.ignore_hidden_elements = false
end