class FacebookProfileScraper::Scraper

Public Instance Methods

scrape(username) click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 37
def scrape(username)
  login
  scrape_photos(username)
end

Private Instance Methods

download_photo() click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 52
def download_photo
  find('.fbPhotoSnowliftDropdownButton').click

  begin
    find('a[data-action-type="download_photo"]', visible: true).click
  rescue Capybara::ElementNotFound
    # TODO(maros): Is there a download method in Capybara `chromedriver`?
    execute_script("
      link = document.createElement('a');
      link.href = document.querySelector('.spotlight').src;
      link.setAttribute('download', 'download');
      link.click();")
  end
end
login() click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 122
def login
  visit('/login')
  fill_in('email', with: ENV['FACEBOOK_EMAIL'])
  fill_in('pass',  with: ENV['FACEBOOK_PASSWORD'])
  click_button('loginbutton')

  while has_css?('#approvals_code')
    print 'Enter your 6-digit login code: '
    fill_in('approvals_code', with: gets.chomp)
    click_button('checkpointSubmitButton')
  end

  while has_css?('#checkpointSubmitButton')
    click_button('checkpointSubmitButton')
  end
end
scrape_album(href) click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 67
def scrape_album(href)
  visit(href)

  # HACK(maros): Make the backdrop for Chrome Notifications go away. Find a
  # pref for `chromedriver` to make this disabled by default.
  find('._3ixn').click

  photo_links = []

  # Get all photos to load despite infinite scroll.
  loop do
    execute_script('window.scrollTo(0, document.body.scrollHeight);')
    links = find_link_elems_with('/photo.php')
    break if links.length - photo_links.length == 0
    photo_links = links
  end

  # Remove the cover photo and profile photo.
  photo_links.shift
  photo_links.shift

  if photo_links.length == 0
    return
  end

  photo_links.first.click
  photo_links.length.times do
    download_photo

    begin
      find('.snowliftPager.next').click

    # If the photo album has only one image, this element won't exist.
    rescue Selenium::WebDriver::Error::ElementNotVisibleError
      break
    end
  end

  # Add directory name for album.
  title = find('.fbPhotoAlbumTitle').text.downcase.gsub(' ', '_')

  begin
    File.rename("#{Dir.pwd}/tmp/downloads", "#{Dir.pwd}/tmp/#{title}")

  # This will fail if no files were downloaded because the `tmp/downloads`
  # directory will not exist.
  rescue Errno::ENOENT
  end
end
scrape_photos(username) click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 117
def scrape_photos(username)
  visit("/#{username}/photos_albums")
  find_links_with('/media/set').each { |link| scrape_album(link) }
end