class Qwik::TextTokenizer
Constants
- MULTILINE
Public Class Methods
tokenize(str, br_mode=false)
click to toggle source
Tokenize a text.
# File vendor/qwik/lib/qwik/tokenizer.rb, line 25 def self.tokenize(str, br_mode=false) tokens = [] in_multiline = {} scanner = StringScanner.new(str) while ! scanner.eos? line = scanner.scan(/.*$/) newline = scanner.scan(/\n/) line.chomp! # At the first, check if it is in a multiline block. last_token = tokens.last if last_token last_tag = last_token[0] if in_multiline[last_tag] if MULTILINE[last_tag][1] =~ line in_multiline[last_tag] = nil else last_token[-1] += "#{line.chomp}\n" end next end end line.chomp! # preprocess #line.gsub!(/&my-([0-9]+);/) {|m| "{{my_char(#{$1})}}" } case line when MULTILINE[:plugin][0] in_multiline[:plugin] = true tokens << [:plugin, $1.to_s, $2.to_s, ''] when MULTILINE[:pre][0] in_multiline[:pre] = true tokens << [:pre, ''] when MULTILINE[:html][0] tokens << [:html, ''] in_multiline[:html] = true when /\A====+\z/, '-- ', /\A----+\z/ # hr tokens << [:hr] when /\A(\-{1,3})(.+)\z/ # ul tokens << [:ul, $1.size, $2.to_s.strip] when /\A(\+{1,3})(.+)\z/ # ol tokens << [:ol, $1.size, $2.to_s.strip] when /\A>(.*)\z/ # blockquote tokens << [:blockquote, $1.strip] when /\A[ \t](.*)\z/ # pre tokens << [:pre, $1] when /\A\{\{([^\(\)\{\}]+?)(?:\(([^\(\)\{\}]*?)\))?\}\}\z/ # plugin tokens << [:plugin, $1.to_s, $2.to_s] when '', /\A#/ # '', or comment. tokens << [:empty] # empty line when /\A([,|])(.*)\z/ # pre re = Regexp.new(Regexp.quote($1), nil) ar = [:table] + $2.split(re).map {|a| a.to_s } tokens << ar when /\A:(.*)\z/ # dl rest = $1 dt, dd = rest.split(':', 2) if dt && dt.include?('(') if /\A(.*?\(.*\)[^:]*):(.*)\z/ =~ rest # FIXME: Bad hack. dt, dd = $1, $2 end end ar = [:dl] ar << ((dt && ! dt.empty?) ? dt.to_s.strip : nil) ar << ((dd && ! dd.empty?) ? dd.to_s.strip : nil) tokens << ar when /\A([*!]{1,5})\s*(.+)\s*\z/ # h h = $1 s = $2 s = s.strip if s.empty? # '* ' inline(tokens, line, br_mode) else tokens << [("h#{h.size+1}").intern, s] end else inline(tokens, line, br_mode) end end return tokens end
Private Class Methods
inline(tokens, line, br_mode)
click to toggle source
# File vendor/qwik/lib/qwik/tokenizer.rb, line 114 def self.inline(tokens, line, br_mode) if /~\z/ =~ line line = line.sub(/~\z/, '{{br}}') # //s means shift_jis elsif br_mode line = "#{line}{{br}}" end tokens << [:text, line] end