class Crawlers::Rss

Public Class Methods

new(rss_url) click to toggle source
# File lib/crawlers/rss.rb, line 10
def initialize(rss_url)
  @rss_url = rss_url
end

Public Instance Methods

articles() click to toggle source
# File lib/crawlers/rss.rb, line 14
def articles
  articles = Parallel.map(rss_feed_items) do |feed_item|
    crawl_article(feed_item)
  end
  articles.reject(&:empty?)
end

Private Instance Methods

crawl_article(feed_item) click to toggle source
# File lib/crawlers/rss.rb, line 34
def crawl_article(feed_item)
  page_with_article = page_content(feed_item.link)
  extract_primary_content(page_with_article)
end
page_content(page_url) click to toggle source
# File lib/crawlers/rss.rb, line 39
def page_content(page_url)
  open(page_url).read
rescue StandardError
  ''
end
parse_feed(rss_feed) click to toggle source
# File lib/crawlers/rss.rb, line 28
def parse_feed(rss_feed)
  RSS::Parser.parse(rss_feed)&.items
rescue RSS::Error
  []
end
rss_feed_items() click to toggle source
# File lib/crawlers/rss.rb, line 23
def rss_feed_items
  rss_feed = page_content(@rss_url)
  parse_feed(rss_feed)
end