class HtmlParser
Attributes
descriptions[RW]
doc[RW]
metadata[RW]
summaries[RW]
titles[RW]
Public Class Methods
new(html)
click to toggle source
# File lib/html_parser.rb, line 8 def initialize(html) @doc = Nokogiri::HTML.parse(html) end
Public Instance Methods
parse()
click to toggle source
# File lib/html_parser.rb, line 12 def parse @titles = @doc.css('dt > h3') @metadata = @doc.css('ul.entry-metadata > li.entry-type > span') @summaries = @doc.css('div.entry-summary > p') @descriptions = @doc.css('div.entry-document') clean_string end
Private Instance Methods
clean_string()
click to toggle source
# File lib/html_parser.rb, line 21 def clean_string @titles = @titles.map do |e| e.text.gsub(/\n/, '').strip.gsub(/\(\d+\)\Z/, '').strip end @metadata = @metadata.map do |e| e.text.gsub(/\n/, '').strip end @summaries = @summaries.map do |e| e.text.gsub(/\n/, '').strip end @descriptions = @descriptions.map do |e| e.text.gsub(/\n/, '').strip end end