class Alexandria::BookProviders::WebsiteBasedProvider
Public Class Methods
new(name, fullname = nil)
click to toggle source
Calls superclass method
Alexandria::BookProviders::AbstractProvider::new
# File lib/alexandria/book_providers/web.rb, line 13 def initialize(name, fullname = nil) super(name, fullname) @htmlentities = HTMLEntities.new end
Public Instance Methods
html_to_doc(html, source_data_charset = "ISO-8859-1")
click to toggle source
# File lib/alexandria/book_providers/web.rb, line 18 def html_to_doc(html, source_data_charset = "ISO-8859-1") html.force_encoding source_data_charset utf8_html = html.encode("utf-8") normalized_html = @htmlentities.decode(utf8_html) Hpricot(normalized_html) end
text_of(node)
click to toggle source
from Palatina
# File lib/alexandria/book_providers/web.rb, line 26 def text_of(node) if node.nil? nil elsif node.text? node.to_html elsif node.elem? if node.children.nil? nil else node_text = node.children.map { |n| text_of(n) }.join node_text.strip.squeeze(" ") end end # node.inner_html.strip end