class HtmlParser

Attributes

descriptions[RW]
doc[RW]
metadata[RW]
summaries[RW]
titles[RW]

Public Class Methods

new(html) click to toggle source
# File lib/html_parser.rb, line 8
def initialize(html)
  @doc = Nokogiri::HTML.parse(html)
end

Public Instance Methods

parse() click to toggle source
# File lib/html_parser.rb, line 12
def parse
  @titles = @doc.css('dt > h3')
  @metadata =  @doc.css('ul.entry-metadata > li.entry-type > span')
  @summaries = @doc.css('div.entry-summary > p')
  @descriptions = @doc.css('div.entry-document')
  clean_string
end

Private Instance Methods

clean_string() click to toggle source
# File lib/html_parser.rb, line 21
def clean_string
  @titles = @titles.map do |e|
    e.text.gsub(/\n/, '').strip.gsub(/\(\d+\)\Z/, '').strip
  end
  @metadata = @metadata.map do |e|
    e.text.gsub(/\n/, '').strip
  end
  @summaries = @summaries.map do |e|
    e.text.gsub(/\n/, '').strip
  end
  @descriptions = @descriptions.map do |e|
    e.text.gsub(/\n/, '').strip
  end
end