class TextUtils::Sanitizier
Public Class Methods
new( ht )
click to toggle source
# File lib/textutils/sanitizier.rb, line 14 def initialize( ht ) @ht = ht # hypertext (html source) end
Public Instance Methods
handle_entities( ht )
click to toggle source
# File lib/textutils/sanitizier.rb, line 34 def handle_entities( ht ) ## unescape entities # - check if it also works for generic entities like  etc. # or only for > < etc. ht = CGI.unescapeHTML( ht ) end
tag_regex( tag )
click to toggle source
# File lib/textutils/sanitizier.rb, line 41 def tag_regex( tag ) # note use non-greedy .*? for content /<#{tag}[^>]*>(.*?)<\/#{tag}>/mi end
to_plain_text()
click to toggle source
# File lib/textutils/sanitizier.rb, line 18 def to_plain_text ht = @ht ht = handle_ignore_tags( ht ) ## handle_pre_tags ?? - special rule for preformatted (keep whitespace) ht = handle_inline_tags( ht ) ht = handle_block_tags( ht ) ht = handle_other_tags( ht ) # rules for remain/left over tags ht = handle_entities( ht ) ht end