module BuntoImport::Util

Public Class Methods

wpautop(pee, br = true) click to toggle source

Ruby translation of wordpress wpautop (see core.trac.wordpress.org/browser/trunk/src/wp-includes/formatting.php)

A group of regex replaces used to identify text formatted with newlines and replace double line-breaks with HTML paragraph tags. The remaining line-breaks after conversion become <<br />> tags, unless $br is set to false

@param string pee The text which has to be formatted. @param bool br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true. @return string Text which has been converted into correct paragraph tags.

# File lib/bunto-import/util.rb, line 14
def self.wpautop(pee, br = true)
  return '' if pee.strip == ''

  allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
  pre_tags = {}
  pee = pee + "\n"

  if pee.include?('<pre')
    pee_parts = pee.split('</pre>')
    last_pee = pee_parts.pop
    pee = ''
    pee_parts.each_with_index do |pee_part, i|
      start = pee_part.index('<pre')

      unless start
        pee += pee_part
        next
      end

      name = "<pre wp-pre-tag-#{i}></pre>"
      pre_tags[name] = pee_part[start..-1] + '</pre>'

      pee += pee_part[0, start] + name
    end
    pee += last_pee
  end

  pee = pee.gsub(Regexp.new('<br />\s*<br />'), "\n\n")
  pee = pee.gsub(Regexp.new("(<" + allblocks + "[^>]*>)"), "\n\\1")
  pee = pee.gsub(Regexp.new("(</" + allblocks + ">)"), "\\1\n\n")
  pee = pee.gsub("\r\n", "\n").gsub("\r", "\n")
  if pee.include? '<object'
    pee = pee.gsub(Regexp.new('\s*<param([^>]*)>\s*'), "<param\\1>")
    pee = pee.gsub(Regexp.new('\s*</embed>\s*'), '</embed>')
  end

  pees = pee.split(/\n\s*\n/).compact
  pee = ''
  pees.each { |tinkle| pee += '<p>' + tinkle.chomp("\n") + "</p>\n" }
  pee = pee.gsub(Regexp.new('<p>\s*</p>'), '')
  pee = pee.gsub(Regexp.new('<p>([^<]+)</(div|address|form)>'), "<p>\\1</p></\\2>")
  pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + '[^>]*>)\s*</p>'), "\\1")
  pee = pee.gsub(Regexp.new('<p>(<li.+?)</p>'), "\\1")
  pee = pee.gsub(Regexp.new('<p><blockquote([^>]*)>', 'i'), "<blockquote\\1><p>")
  pee = pee.gsub('</blockquote></p>', '</p></blockquote>')
  pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + '[^>]*>)'), "\\1")
  pee = pee.gsub(Regexp.new('(</?' + allblocks + '[^>]*>)\s*</p>'), "\\1")
  if br
    pee = pee.gsub(Regexp.new('<(script|style).*?</\1>')) { |match| match.gsub("\n", "<WPPreserveNewline />") }
    pee = pee.gsub(Regexp.new('(?<!<br />)\s*\n'), "<br />\n")
    pee = pee.gsub('<WPPreserveNewline />', "\n")
  end
  pee = pee.gsub(Regexp.new('(</?' + allblocks + '[^>]*>)\s*<br />'), "\\1")
  pee = pee.gsub(Regexp.new('<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)'), "\\1")
  pee = pee.gsub(Regexp.new('\n</p>$'), '</p>')

  pre_tags.each do |name, value|
    pee.gsub!(name, value)
  end
  pee
end