class Crawlers::Rss
Public Class Methods
new(rss_url)
click to toggle source
# File lib/crawlers/rss.rb, line 10 def initialize(rss_url) @rss_url = rss_url end
Public Instance Methods
articles()
click to toggle source
# File lib/crawlers/rss.rb, line 14 def articles articles = Parallel.map(rss_feed_items) do |feed_item| crawl_article(feed_item) end articles.reject(&:empty?) end
Private Instance Methods
crawl_article(feed_item)
click to toggle source
# File lib/crawlers/rss.rb, line 34 def crawl_article(feed_item) page_with_article = page_content(feed_item.link) extract_primary_content(page_with_article) end
page_content(page_url)
click to toggle source
# File lib/crawlers/rss.rb, line 39 def page_content(page_url) open(page_url).read rescue StandardError '' end
parse_feed(rss_feed)
click to toggle source
# File lib/crawlers/rss.rb, line 28 def parse_feed(rss_feed) RSS::Parser.parse(rss_feed)&.items rescue RSS::Error [] end
rss_feed_items()
click to toggle source
# File lib/crawlers/rss.rb, line 23 def rss_feed_items rss_feed = page_content(@rss_url) parse_feed(rss_feed) end