class NewsHelper::News

Attributes

headline[RW]
publish_date[RW]
url[RW]

Public Class Methods

get_news() click to toggle source
# File lib/news_helper/news.rb, line 4
def self.get_news
  news = [
      self.scrape_tech_sites,
      self.scrape_health_sites,
      self.scrape_political_sites
      ]
end
scrape_bus_ins(bus_page) click to toggle source
# File lib/news_helper/news.rb, line 37
def self.scrape_bus_ins(bus_page)
  scraped_news = [] #To hold and return the News objects
      bus_page.css(".river-item.featured-post").each {|section|
                      article = self.new 
                      article.headline = section.css("a.tout-title-link").text.strip
                      article.publish_date = section.css("span.tout-timestamp").text.split("T")[0]
                      article.url = "https://www.businessinsider.com" + section.css("a.tout-title-link").attribute("href").value.strip
                      scraped_news << article
      } 
      scraped_news
end
scrape_fox_news(fox_page) click to toggle source
# File lib/news_helper/news.rb, line 61
def self.scrape_fox_news(fox_page)
  scraped_news = []
  articles = fox_page.css(".content.article-list") #This is to avoid some more off-topic articles at the top of the page
      articles.css("article.article").each {|section|
                      article = self.new
                      article.headline = section.css(".title").text.strip
                      article.publish_date = section.css(".time").text.strip
                      article.url = "https://www.foxnews.com" + section.css(".m").css("a").attribute("href").value.strip
      scraped_news << article
      }
      scraped_news
end
scrape_google_news(google_page) click to toggle source
# File lib/news_helper/news.rb, line 49
def self.scrape_google_news(google_page)
  scraped_news = []
      google_page.css(".xrnccd.F6Welf.R7GTQ.keNKEd.j7vNaf").each {|section|
                      article = self.new
                      article.headline = section.css("a.DY5T1d").first.text
                      article.publish_date = section.css(".WW6dff.uQIVzc.Sksgp").first.text.strip 
                      article.url = "https://news.google.com" + section.css(".VDXfz").attribute("href").value.strip
                      scraped_news << article
      }
      scraped_news
end
scrape_health_sites() click to toggle source
# File lib/news_helper/news.rb, line 21
def self.scrape_health_sites 
      bus = Nokogiri::HTML(URI.open("https://www.businessinsider.com/healthcare")) 
      google = Nokogiri::HTML(URI.open("https://news.google.com/topics/CAAqIQgKIhtDQkFTRGdvSUwyMHZNR3QwTlRFU0FtVnVLQUFQAQ?hl=en-US&gl=US&ceid=US%3Aen"))
      fox = Nokogiri::HTML(URI.open("https://www.foxnews.com/health"))
      
      sort_news(scrape_bus_ins(bus), scrape_google_news(google), scrape_fox_news(fox))
end
scrape_political_sites() click to toggle source
# File lib/news_helper/news.rb, line 29
def self.scrape_political_sites 
      bus = Nokogiri::HTML(URI.open("https://www.businessinsider.com/s?q=politics"))
      google = Nokogiri::HTML(URI.open("https://news.google.com/topics/CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ4ZERBU0FtVnVLQUFQAQ?hl=en-US&gl=US&ceid=US%3Aen"))
      fox = Nokogiri::HTML(URI.open("https://www.foxnews.com/politics/"))
      
      sort_news(scrape_bus_ins(bus), scrape_google_news(google), scrape_fox_news(fox))
end
scrape_tech_sites() click to toggle source
# File lib/news_helper/news.rb, line 12
def self.scrape_tech_sites 
      bus = Nokogiri::HTML(URI.open("https://www.businessinsider.com/sai"))
      google = Nokogiri::HTML(URI.open("https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGRqTVhZU0FtVnVHZ0pWVXlnQVAB?hl=en-US&gl=US&ceid=US%3Aen"))
      fox = Nokogiri::HTML(URI.open("https://www.foxnews.com/tech/"))
  #The below method calls methods designed to scrape business insider, google and fox news
 
      sort_news(scrape_bus_ins(bus), scrape_google_news(google), scrape_fox_news(fox))
end
sort_news(source_1, source_2, source_3) click to toggle source
# File lib/news_helper/news.rb, line 74
def self.sort_news(source_1, source_2, source_3)
  sorted_news = [] #This will be the array of 15 articles, five from each website
      5.times { |i|
        sorted_news << source_1[i]
        sorted_news << source_2[i]
        sorted_news << source_3[i]
      }
      sorted_news
end