class GoogleImageScraper::Scraper
Constants
- LARGE_PICTURE_XPATH
- SEARCH_PAGE_URL_FORMAT
Public Class Methods
new()
click to toggle source
# File lib/google_image_scraper/scraper.rb, line 10 def initialize @driver = setup_driver @file_saver = FileSaver.new @wait = Selenium::WebDriver::Wait.new(timeout: 10) end
Public Instance Methods
scrape(keyword, limit = nil)
click to toggle source
# File lib/google_image_scraper/scraper.rb, line 16 def scrape(keyword, limit = nil) @driver.get search_page_url(keyword) img_end = limit&.-(1) || -1 start = 0 loop do # After 2nd time of this execution, html_elements are increased than previous execution. # This is because search page uses continuous loading triggered by scroll point. elements = @driver.find_elements(css: '.mJxzWe img') break if start == elements.size download_images elements[start..img_end] start = elements.size elements.last.click unless limit end end
Private Instance Methods
download_images(html_elements)
click to toggle source
# File lib/google_image_scraper/scraper.rb, line 40 def download_images(html_elements) html_elements.each do |element| # Show image in left side and then wait for large image will be displayed. element.click sleep 0.4 large_picture_element = @wait.until do @driver.find_element(:xpath, LARGE_PICTURE_XPATH) end src = large_picture_element.attribute('src') @file_saver.save(src) end end
search_page_url(keyword)
click to toggle source
# File lib/google_image_scraper/scraper.rb, line 36 def search_page_url(keyword) format(SEARCH_PAGE_URL_FORMAT, URI.encode_www_form_component(keyword)) end
setup_driver()
click to toggle source
# File lib/google_image_scraper/scraper.rb, line 54 def setup_driver caps = Selenium::WebDriver::Remote::Capabilities.chrome( 'chromeOptions' => { args: %w[--headless --disable-gpu window-size=1280x8000] } ) # caps = Selenium::WebDriver::Remote::Capabilities.chrome Selenium::WebDriver.for :chrome, desired_capabilities: caps end