class NycToday::Scraper
Public Class Methods
get_pages()
click to toggle source
# File lib/nyc_today/scraper.rb, line 6 def self.get_pages num = 1 while num <= 10 page_url = @@main_url + "/events/today?page=#{num}" page = Nokogiri::HTML(open(page_url)) @@pages << page num += 1 end end
scrape_event_page(event)
click to toggle source
# File lib/nyc_today/scraper.rb, line 38 def self.scrape_event_page(event) event_page = Nokogiri::HTML(open(event.event_link)) event.price = event_page.css(".ds-ticket-info").text.strip.gsub!(/\s+/, " ") if event_page.at(".ds-event-description-inner") event.event_info = event_page.css(".ds-event-description-inner").text.strip.gsub!(/$/, "\n").to_s else event.event_info = nil end end
scrape_events()
click to toggle source
# File lib/nyc_today/scraper.rb, line 16 def self.scrape_events get_pages @@pages.each do |page| page.css(".event-card").each do |event| url_end = event.css("a").attribute("href").value event_hash = {} event_hash[:event_type] = event.attr("class").sub!("ds-listing event-card ds-event-category-", "").split("-").map(&:capitalize).join(" ") if event.css(".ds-byline").text != "" event_hash[:name] = event.css(".ds-byline").text.lstrip + ": " + event.css(".ds-listing-event-title-text").text.lstrip else event_hash[:name] = event.css(".ds-listing-event-title-text").text.lstrip end event_hash[:venue] = event.css(".ds-venue-name").text.gsub!(/\s+/, " ").strip event_hash[:time] = event.css(".dtstart").text.gsub!(/\s+/, " ").strip event_hash[:time_stamp] = Time.parse(event_hash[:time]) event_hash[:event_link] = @@main_url + url_end NycToday::Event.new(event_hash) end end NycToday::Event.reformat_types end