class FacebookProfileScraper::Scraper
Public Instance Methods
scrape(username)
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 37 def scrape(username) login scrape_photos(username) end
Private Instance Methods
download_photo()
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 52 def download_photo find('.fbPhotoSnowliftDropdownButton').click begin find('a[data-action-type="download_photo"]', visible: true).click rescue Capybara::ElementNotFound # TODO(maros): Is there a download method in Capybara `chromedriver`? execute_script(" link = document.createElement('a'); link.href = document.querySelector('.spotlight').src; link.setAttribute('download', 'download'); link.click();") end end
find_link_elems_with(href)
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 44 def find_link_elems_with(href) all('a').select { |elem| elem[:href] && elem[:href].include?(href) } end
find_links_with(href)
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 48 def find_links_with(href) find_link_elems_with(href).map { |elem| elem[:href] }.uniq end
login()
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 122 def login visit('/login') fill_in('email', with: ENV['FACEBOOK_EMAIL']) fill_in('pass', with: ENV['FACEBOOK_PASSWORD']) click_button('loginbutton') while has_css?('#approvals_code') print 'Enter your 6-digit login code: ' fill_in('approvals_code', with: gets.chomp) click_button('checkpointSubmitButton') end while has_css?('#checkpointSubmitButton') click_button('checkpointSubmitButton') end end
scrape_album(href)
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 67 def scrape_album(href) visit(href) # HACK(maros): Make the backdrop for Chrome Notifications go away. Find a # pref for `chromedriver` to make this disabled by default. find('._3ixn').click photo_links = [] # Get all photos to load despite infinite scroll. loop do execute_script('window.scrollTo(0, document.body.scrollHeight);') links = find_link_elems_with('/photo.php') break if links.length - photo_links.length == 0 photo_links = links end # Remove the cover photo and profile photo. photo_links.shift photo_links.shift if photo_links.length == 0 return end photo_links.first.click photo_links.length.times do download_photo begin find('.snowliftPager.next').click # If the photo album has only one image, this element won't exist. rescue Selenium::WebDriver::Error::ElementNotVisibleError break end end # Add directory name for album. title = find('.fbPhotoAlbumTitle').text.downcase.gsub(' ', '_') begin File.rename("#{Dir.pwd}/tmp/downloads", "#{Dir.pwd}/tmp/#{title}") # This will fail if no files were downloaded because the `tmp/downloads` # directory will not exist. rescue Errno::ENOENT end end
scrape_photos(username)
click to toggle source
# File lib/facebook_profile_scraper/scraper.rb, line 117 def scrape_photos(username) visit("/#{username}/photos_albums") find_links_with('/media/set').each { |link| scrape_album(link) } end