class WebPageParser::BbcNewsPageParserV6
Public Instance Methods
content()
click to toggle source
# File lib/web-page-parser/parsers/bbc_news_page_parser.rb, line 222 def content return @content if @content @content = [] story_body = html_doc.css('div.story-body > div.story-body__inner') story_body.children.each do |n| case n.name when 'p', 'h2', 'h3' @content << n.text.strip when 'ul' if n['class'] =~ /story-body/ n.css('li').each do |li| @content << li.text.strip end end end end @content end
date()
click to toggle source
# File lib/web-page-parser/parsers/bbc_news_page_parser.rb, line 243 def date return @date if @date if date_meta = html_doc.at_css('meta[name=OriginalPublicationDate]') @date = DateTime.parse(date_meta['content']) rescue nil end @date end
html_doc()
click to toggle source
# File lib/web-page-parser/parsers/bbc_news_page_parser.rb, line 214 def html_doc @html_document ||= Nokogiri::HTML(page) end
title()
click to toggle source
# File lib/web-page-parser/parsers/bbc_news_page_parser.rb, line 218 def title @title ||= html_doc.css('h1.story-body__h1').text.strip end