class WebPageParser::GuardianPageParserV3

Public Instance Methods

content() click to toggle source
# File lib/web-page-parser/parsers/guardian_page_parser.rb, line 100
def content
  return @content if @content
  story_body = html_doc.css('div#article-body-blocks *, div[itemprop=articleBody] *').select do |e|
    e.name == 'p' or e.name == 'h2' or e.name == 'h3' or e.name == 'ul'
  end
  story_body.collect do |p|
    if p.name == 'ul'
      p.css('li').collect { |li| li.text.empty? ? nil : li.text.strip }
    else
      p.text.empty? ? nil : p.text.strip
    end
  end.flatten.compact
end