module TypoHero

Constants

AMP_RE
BDQUO
CAPS_INNER_RE
CAPS_RE
DASH_RE
ELLIPSIS
ESCAPE
ESCAPE_RE
EXCLUDED_TAGS
EXCLUDED_TAGS_RE
INITIAL_QUOTES
INITIAL_QUOTE_RE
INLINE_RE
LATEX
LATEX_RE
LDQUO
LEFT_PAREN_RE
LEFT_QUOTES
LEFT_QUOTE_RE
LSQUO
MDASH
MDASH_SPACE
MDASH_SPACE_RE
NBSP
NBSP_THIN
NDASH
NDASH_SPACE
NDASH_SPACE_RE
ORDINAL_RE
PARAGRAPH_RE
PRIMES
PRIME_RE
RDQUO
REPLACE_AMP_RE
RIGHT_QUOTES
RIGHT_QUOTE_RE
RSQUO
SPECIAL
SPECIAL_RE
TOKENIZER_RE
TWO_QUOTES
UNESCAPE
UNESCAPE_RE
UNITS
UNITS_RE
VERSION
WIDONT_INLINE_RE
WIDONT_NBSP_RE
WIDONT_PARAGRAPH_RE

Public Instance Methods

amp(s) click to toggle source
# File lib/typohero.rb, line 344
def amp(s)
  s.gsub!(REPLACE_AMP_RE, '<span class="amp">&amp;</span>')
end
caps(s) click to toggle source
# File lib/typohero.rb, line 348
def caps(s)
  s.gsub!(CAPS_RE, '<span class="caps">\1</span>')
end
dash_spaces(s) click to toggle source
# File lib/typohero.rb, line 339
def dash_spaces(s)
  s.gsub!(MDASH_SPACE_RE, MDASH_SPACE)
  s.gsub!(NDASH_SPACE_RE, NDASH_SPACE)
end
decode(s) click to toggle source
# File lib/typohero.rb, line 316
def decode(s)
  s.gsub!(/&#x([0-9A-F]+);|&#([0-9]+);/i) do
    i = $1 ? $1.to_i(16) : $2.to_i(10)
    i == 38 ? '&amp;' : i.chr('UTF-8')
  end
end
enhance(input) click to toggle source
# File lib/typohero.rb, line 252
def enhance(input)
  tokens, text, prev_last_char = [], []
  tokenize(input) do |s, type|
    if type == :text
      last_char = s[-1]
      decode(s)
      escape(s)
      units(s)
      primes(s)
      special(s)
      latex(s)
      quotes(s, prev_last_char)
      dash_spaces(s)
      prev_last_char = last_char
      text << s
    end
    tokens << s
  end
  widont(tokens)
  text.each do |s|
    caps(s)
    initial_quotes(s)
    amp(s)
    ordinals(s)
    nobr(s)
    unescape(s)
  end
  html_safe(input, tokens.join)
end
escape(s) click to toggle source
# File lib/typohero.rb, line 323
def escape(s)
  s.gsub!(ESCAPE_RE, ESCAPE)
end
html_safe(src, dst) click to toggle source
# File lib/typohero.rb, line 312
def html_safe(src, dst)
  src.respond_to?(:html_safe?) && src.html_safe? ? dst.html_safe : dst
end
initial_quotes(s) click to toggle source
# File lib/typohero.rb, line 352
def initial_quotes(s)
  s.gsub!(INITIAL_QUOTE_RE, INITIAL_QUOTES)
end
latex(s) click to toggle source
# File lib/typohero.rb, line 335
def latex(s)
  s.gsub!(LATEX_RE, LATEX)
end
nobr(s) click to toggle source
# File lib/typohero.rb, line 356
def nobr(s)
  s.gsub!(/[\p{Word}]+(-[\p{Word}]+)+/, '<span class="nobr">\0</span>')
end
ordinals(s) click to toggle source
# File lib/typohero.rb, line 365
def ordinals(s)
  s.gsub!(ORDINAL_RE, '<span class="ord">\1</span>')
end
primes(s) click to toggle source
# File lib/typohero.rb, line 360
def primes(s)
  # Special case for inches and minutes, seconds
  s.gsub!(PRIME_RE, PRIMES)
end
quotes(s, prev_last_char) click to toggle source
# File lib/typohero.rb, line 369
def quotes(s, prev_last_char)
  if s =~ /\A['"]\Z/
    s.replace(prev_last_char =~ /\P{Space}/ ? RIGHT_QUOTES[s] : LEFT_QUOTES[s])
    return
  end

  # Special case for double sets of quotes, e.g.
  #   <p>He said, "'Quoted' words in a larger quote."</p>
  s.gsub!(/(?:"'|'")(?=\p{Word})/, TWO_QUOTES)
  s.gsub!(RIGHT_QUOTE_RE, RIGHT_QUOTES)
  s.gsub!(/['"]/,         LEFT_QUOTES)
end
special(s) click to toggle source
# File lib/typohero.rb, line 331
def special(s)
  s.gsub!(SPECIAL_RE, SPECIAL)
end
strip_tags(input) click to toggle source
# File lib/typohero.rb, line 246
def strip_tags(input)
  out = ''
  tokenize(input) {|s, type| out << s if type == :text || type == :latex }
  html_safe(input, out)
end
tokenize(input) { |s, type| ... } click to toggle source
# File lib/typohero.rb, line 137
def tokenize(input)
  excluded, latex, dollar = 0, 0, 0
  input.scan TOKENIZER_RE do |s|
    type =
      if s =~ /\A<!--/
        :comment
      elsif s =~ /\A<!\[/
        :cdata
      end

    if !type && latex == 0 && dollar.even?
      if s=~ /\A</
        if s =~ EXCLUDED_TAGS_RE
          excluded += $1 ? -1 : 1
          excluded = 0 if excluded < 0
          type = :excluded
        else
          type = excluded == 0 ? :tag : :excluded
        end
      end
    end

    if !type && excluded == 0
      case s
      when /\A\\[\(\[]\Z/
        latex += 1
        type = :latex
      when /\A\\[\)\]]\Z/
        latex -= 1 if latex > 0
        type = :latex
      when '$$'
        dollar += 1
        type = :latex
      end
    end

    type ||=
      if excluded != 0
        :excluded
      elsif latex != 0 || dollar.odd?
        :latex
      else
        :text
      end

    yield(s, type)
  end
end
tokenize_with_tags(input) { |s, type, tags| ... } click to toggle source
# File lib/typohero.rb, line 186
def tokenize_with_tags(input)
  tags = []
  tokenize(input) do |s, type|
    if type == :tag && s =~ /\A<(\/)?([^\p{Space}\/>]+)/
      if $1
        until tags.empty? || tags.pop == $2; end
      else
        tags << $2
      end
    end
    yield(s, type, tags)
  end
end
truncate(input, *max_words_or_separator) click to toggle source
# File lib/typohero.rb, line 200
def truncate(input, *max_words_or_separator)
  max_words = max_words_or_separator.select {|i| Fixnum === i }.first
  if separator = max_words_or_separator.reject {|i| Fixnum === i }.first
    separator = Regexp.union(separator) unless Regexp === separator
    separator = nil unless input =~ separator
  end
  out, tail, truncated = '', '', false
  tokenize_with_tags(input) do |s, type, tags|
    if separator && (type == :comment || type == :text || type == :latex || type == :tag) && separator === s
      out << $` if type == :text
      if type == :tag
        if s =~ /\A<\//
          tail << s
        else
          tags.pop
        end
      end
      truncated = tags
      break
    elsif max_words == 0
      if type == :text
        truncated = tags
        break
      end
      tail << s
    else
      if max_words && type == :text
        s =~ /\A(\p{Space}*)(.*)\Z/m
        ws, w = $1, $2.split(/\p{Space}+/)
        if w.size > max_words
          out << ws << w[0...max_words].join(' ')
          truncated = tags
          break
        end
        max_words -= w.size
      end
      out << s
    end
  end
  if truncated
    out.sub!(/[\p{Space}\p{Punct}]*\Z/, ELLIPSIS)
    tail << "</#{truncated.pop}>" until truncated.empty?
  end
  html_safe(input, out << tail)
end
unescape(s) click to toggle source
# File lib/typohero.rb, line 327
def unescape(s)
  s.gsub!(UNESCAPE_RE, UNESCAPE)
end
units(s) click to toggle source
# File lib/typohero.rb, line 382
def units(s)
  s.gsub!(UNITS_RE, UNITS)
end
widont(tokens) click to toggle source
# File lib/typohero.rb, line 282
def widont(tokens)
  state, i, widow = 1, tokens.size - 1, nil
  while i >= 0
    if tokens[i] =~ WIDONT_PARAGRAPH_RE
      state = 1
    elsif tokens[i] !~ WIDONT_INLINE_RE
      if tokens[i] =~ WIDONT_NBSP_RE
        state = 0
      elsif state == 1 || state == 3
        if tokens[i] =~ (state == 1 ? /(\P{Space}+)?(\p{Space}+)?(\P{Space}+\p{Space}*)\Z/m :
                                      /(\P{Space}+)?(\p{Space}+)(\P{Space}*)\Z/m)
          if $1 && $2
            tokens[i].replace "#{$`}#{$1}#{NBSP}#{$3}"
            state = 0
          elsif $2
            state = 2
            widow = tokens[i]
          else
            state = 3
          end
        end
      elsif state == 2 && tokens[i] =~ /(\P{Space}+\p{Space}*)\Z/m
        widow.sub!(/\A\p{Space}*/, NBSP)
        state = 0
      end
    end
    i -= 1
  end
end