class Alexandria::BookProviders::WebsiteBasedProvider

Public Class Methods

new(name, fullname = nil) click to toggle source
# File lib/alexandria/book_providers/web.rb, line 13
def initialize(name, fullname = nil)
  super(name, fullname)
  @htmlentities = HTMLEntities.new
end

Public Instance Methods

html_to_doc(html, source_data_charset = "ISO-8859-1") click to toggle source
# File lib/alexandria/book_providers/web.rb, line 18
def html_to_doc(html, source_data_charset = "ISO-8859-1")
  html.force_encoding source_data_charset
  utf8_html = html.encode("utf-8")
  normalized_html = @htmlentities.decode(utf8_html)
  Hpricot(normalized_html)
end
text_of(node) click to toggle source

from Palatina

# File lib/alexandria/book_providers/web.rb, line 26
def text_of(node)
  if node.nil?
    nil
  elsif node.text?
    node.to_html
  elsif node.elem?
    if node.children.nil?
      nil
    else
      node_text = node.children.map { |n| text_of(n) }.join
      node_text.strip.squeeze(" ")
    end
  end
  # node.inner_html.strip
end