class GoogleImageScraper::Scraper

Constants

LARGE_PICTURE_XPATH
SEARCH_PAGE_URL_FORMAT

Public Class Methods

new() click to toggle source
# File lib/google_image_scraper/scraper.rb, line 10
def initialize
  @driver = setup_driver
  @file_saver = FileSaver.new
  @wait = Selenium::WebDriver::Wait.new(timeout: 10)
end

Public Instance Methods

scrape(keyword, limit = nil) click to toggle source
# File lib/google_image_scraper/scraper.rb, line 16
def scrape(keyword, limit = nil)
  @driver.get search_page_url(keyword)
  img_end = limit&.-(1) || -1
  start = 0

  loop do
    # After 2nd time of this execution, html_elements are increased than previous execution.
    # This is because search page uses continuous loading triggered by scroll point.
    elements = @driver.find_elements(css: '.mJxzWe img')
    break if start == elements.size

    download_images elements[start..img_end]

    start = elements.size
    elements.last.click unless limit
  end
end

Private Instance Methods

download_images(html_elements) click to toggle source
# File lib/google_image_scraper/scraper.rb, line 40
def download_images(html_elements)
  html_elements.each do |element|
    # Show image in left side and then wait for large image will be displayed.
    element.click

    sleep 0.4
    large_picture_element = @wait.until do
      @driver.find_element(:xpath, LARGE_PICTURE_XPATH)
    end
    src = large_picture_element.attribute('src')
    @file_saver.save(src)
  end
end
search_page_url(keyword) click to toggle source
# File lib/google_image_scraper/scraper.rb, line 36
def search_page_url(keyword)
  format(SEARCH_PAGE_URL_FORMAT, URI.encode_www_form_component(keyword))
end
setup_driver() click to toggle source
# File lib/google_image_scraper/scraper.rb, line 54
def setup_driver
  caps = Selenium::WebDriver::Remote::Capabilities.chrome(
    'chromeOptions' => { args: %w[--headless --disable-gpu window-size=1280x8000] }
  )
  # caps = Selenium::WebDriver::Remote::Capabilities.chrome
  Selenium::WebDriver.for :chrome, desired_capabilities: caps
end