class PopularRecipes::RecipeScraper
Public Instance Methods
get_page(url)
click to toggle source
returns HTML in nested nodes
# File lib/scraper.rb, line 5 def get_page(url) Nokogiri::HTML(open(url)) end
scrape_list_page()
click to toggle source
scrapes from index page
# File lib/scraper.rb, line 10 def scrape_list_page doc = get_page("http://www.geniuskitchen.com/ideas/all-time-favorite-recipes-6365?c=24106") index_card = doc.css('div.smart-info div.smart-info-wrap') recipes = [] index_card.each do |card| recipes << { :name => card.css('h2.title a').text.gsub(/#[0-9]*:\s/, ""), :url => card.css('h2.title a').attribute('href').value } end recipes end
scrape_recipe_page(url)
click to toggle source
scrapes from recipe page
# File lib/scraper.rb, line 24 def scrape_recipe_page(url) doc = get_page(url) # fetch ingredients into proper formatting and add into array ingredients = [] ingredient_wrapper = doc.css('ul.ingredient-list li') ingredient_wrapper.each do |element| quantity = element.css('span.qty').text food = element.css('span.food').text text = quantity.ljust(7) + food.strip ingredients << text end # fetch directions and input into directions array directions = [] directions_wrapper = doc.css('div.directions-inner.container-xs ol li') directions_wrapper.each do |element| directions << element.text end # last element is just an empty space directions.pop # apply all scraped elements into a hash=>key to be used to build a recipe object { :author => doc.css('div.rating-and-author h6.byline a').first.text, :total_time => doc.css('table.recipe-facts.servings_unit tbody tr td.time').text.gsub(/\n/,"").gsub(/\s/,"").gsub(/READYIN:/,""), :yield => doc.css('td.servings a span').text, :rating => doc.css('div.five-star span.sr-only').text, :ingredients => ingredients, :directions => directions } end