class Qwik::TextTokenizer

Constants

MULTILINE

Public Class Methods

tokenize(str, br_mode=false) click to toggle source

Tokenize a text.

# File vendor/qwik/lib/qwik/tokenizer.rb, line 25
def self.tokenize(str, br_mode=false)
  tokens = []
  in_multiline = {}

  scanner = StringScanner.new(str)
  while ! scanner.eos?
    line = scanner.scan(/.*$/)
    newline = scanner.scan(/\n/)

    line.chomp!

    # At the first, check if it is in a multiline block.
    last_token = tokens.last
    if last_token
      last_tag = last_token[0]
      if in_multiline[last_tag]
        if MULTILINE[last_tag][1] =~ line
          in_multiline[last_tag] = nil
        else
          last_token[-1] += "#{line.chomp}\n"
        end
        next
      end
    end

    line.chomp!

    # preprocess
    #line.gsub!(/&my-([0-9]+);/) {|m| "{{my_char(#{$1})}}" }

    case line
    when MULTILINE[:plugin][0]
      in_multiline[:plugin] = true
      tokens << [:plugin, $1.to_s, $2.to_s, '']
    when MULTILINE[:pre][0]
      in_multiline[:pre] = true
      tokens << [:pre, '']
    when MULTILINE[:html][0]
      tokens << [:html, '']
      in_multiline[:html] = true
    when /\A====+\z/, '-- ', /\A----+\z/           # hr
      tokens << [:hr]
    when /\A(\-{1,3})(.+)\z/                       # ul
      tokens << [:ul, $1.size, $2.to_s.strip]
    when /\A(\+{1,3})(.+)\z/                       # ol
      tokens << [:ol, $1.size, $2.to_s.strip]
    when /\A>(.*)\z/                               # blockquote
      tokens << [:blockquote, $1.strip]
    when /\A[ \t](.*)\z/                           # pre
      tokens << [:pre, $1]
    when /\A\{\{([^\(\)\{\}]+?)(?:\(([^\(\)\{\}]*?)\))?\}\}\z/     # plugin
      tokens << [:plugin, $1.to_s, $2.to_s]
    when '', /\A#/                                 # '', or comment.
      tokens << [:empty]                           # empty line
    when /\A([,|])(.*)\z/                          # pre
      re = Regexp.new(Regexp.quote($1), nil)
      ar = [:table] + $2.split(re).map {|a| a.to_s }
      tokens << ar
    when /\A:(.*)\z/                               # dl
      rest = $1
      dt, dd = rest.split(':', 2)
      if dt && dt.include?('(')
        if /\A(.*?\(.*\)[^:]*):(.*)\z/ =~ rest     # FIXME: Bad hack.
          dt, dd = $1, $2
        end
      end
      ar = [:dl]
      ar << ((dt && ! dt.empty?) ? dt.to_s.strip : nil)
      ar << ((dd && ! dd.empty?) ? dd.to_s.strip : nil)
      tokens << ar
    when /\A([*!]{1,5})\s*(.+)\s*\z/               # h
      h = $1
      s = $2
      s = s.strip
      if s.empty?          # '* '
        inline(tokens, line, br_mode)
      else
        tokens << [("h#{h.size+1}").intern, s]
      end
    else
      inline(tokens, line, br_mode)
    end
  end

  return tokens
end

Private Class Methods

inline(tokens, line, br_mode) click to toggle source
# File vendor/qwik/lib/qwik/tokenizer.rb, line 114
def self.inline(tokens, line, br_mode)
  if /~\z/ =~ line
    line = line.sub(/~\z/, '{{br}}')       # //s means shift_jis
  elsif br_mode
    line = "#{line}{{br}}"
  end
  tokens << [:text, line]
end