module ActionMailer::Text::HtmlToPlainText
Public Class Methods
convert_to_text(html, line_length = 65, _from_charset = 'UTF-8')
click to toggle source
Returns the text in UTF-8 format with all HTML tags removed
TODO: add support for DL, OL
# File lib/actionmailer-text/html_to_plain_text.rb, line 20 def self.convert_to_text(html, line_length = 65, _from_charset = 'UTF-8') txt = html # decode HTML entities he = HTMLEntities.new txt = he.decode(txt) # Ignore things that come outside the body txt.gsub!(/.*?(<body.+?\/body>).*?/im, '\1') # replace image by their alt attribute txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1') txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1') # links txt.gsub!(/<a.+?href=\"([^\"]*)\"[^>]*>(.+?)<\/a>/mi) do |_s| Regexp.last_match[2].strip + ' ( ' + Regexp.last_match[1].strip + ' )' end txt.gsub!(/<a.+?href='([^\']*)\'[^>]*>(.+?)<\/a>/mi) do |_s| Regexp.last_match[2].strip + ' ( ' + Regexp.last_match[1].strip + ' )' end # handle headings (H1-H6) txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |_s| hlevel = Regexp.last_match[1].to_i htext = Regexp.last_match[2] htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s htext.gsub!(/<\/?[^>]*>/i, '') # strip tags # determine maximum line length hlength = 0 htext.each_line do |l| llength = l.strip.length hlength = llength if llength > hlength end hlength = line_length if line_length && hlength > line_length case hlevel when 1 # H1, asterisks above and below htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) when 2 # H1, dashes above and below htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) else # H3-H6, dashes below htext = htext + "\n" + ('-' * hlength) end "\n\n" + htext + "\n\n" end # wrap spans txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2') # lists -- TODO: should handle ordered lists txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ') # list not followed by a newline txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n") # paragraphs and line breaks txt.gsub!(/<\/p>/i, "\n\n") txt.gsub!(/<br[\/ ]*>/i, "\n") # strip remaining tags txt.gsub!(/<\/?[^>]*>/, '') txt = custom_word_wrap(txt, line_length) if line_length # remove linefeeds (\r\n and \r -> \n) txt.gsub!(/\r\n?/, "\n") # strip extra spaces txt.gsub!(/\302\240+/, ' ') # non-breaking spaces -> spaces txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines # no more than two consecutive newlines txt.gsub!(/[\n]{3,}/, "\n\n") # no more than two consecutive spaces txt.gsub!(/ {2,}/, ' ') txt.strip end
custom_word_wrap(txt, line_length)
click to toggle source
Taken from Rails’ word_wrap helper (api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap)
# File lib/actionmailer-text/html_to_plain_text.rb, line 107 def self.custom_word_wrap(txt, line_length) txt.split("\n").collect do |line| line.length > line_length ? line.gsub(/(.{1,#{line_length}})(\s+|$)/, "\\1\n").strip : line end * "\n" end
Public Instance Methods
convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
click to toggle source
# File lib/actionmailer-text/html_to_plain_text.rb, line 13 def convert_to_text(html, line_length = 65, from_charset = 'UTF-8') HtmlToPlainText.convert_to_text(html, line_length, from_charset) end