class Qwik::InlineTokenizer

Constants

SPECIAL
URL

Public Class Methods

regexp_tokenize(str) click to toggle source

Parse a line into tokens using regexp.

# File vendor/qwik/lib/qwik/tokenizer-inline.rb, line 15
def self.regexp_tokenize(str)
  line_ar = []

  while 0 < str.length
    first_character = str[0]
    rest = str[1, str.length-1]

    case first_character

    when ?'                # ''t'' or '''t'''
      if /\A'''(?!:')/ =~ str
        line_ar << :"'''"
        str = $'
      elsif /\A''/ =~ str
        line_ar << :"''"
        str = $'
      else
        line_ar << first_character.chr
        str = rest
      end

    when ?=                # ==t==
      if /\A==/ =~ str
        line_ar << :'=='
        str = $'
      else
        line_ar << first_character.chr
        str = rest
      end

    when ?[                # [[t]]

      if /\A\[\[(.+?)\]\]/ =~ str  # [[title|url]] format
        line_ar << [:ref, $1]
        str = $'

      elsif /\A\[([^\[\]\s]+?) ([^\[\]\s]+?)\]/ =~ str
        # [url title] format
        line_ar << [:ref, $2+'|'+$1]
        str = $'

      else
        line_ar << first_character.chr
        str = rest
      end

    when ?{                # {{t}}
      if /\A\{\{([^\(\)]+?)(?:\((.*?)\))?\s*\}\}/ =~ str # {{t(a)}}
        ar = [:plugin, $1]
        ar << $2 if $2
        line_ar << ar
        str = $'
      else
        line_ar << first_character.chr
        str = rest
      end

    else
      if /\A#{URL}/ =~ str
        href = $&
        line_ar << [:url, href]
        str  = $'

      elsif /\A[^#{SPECIAL}]+/ =~ str
        m = $&
        after = $'

        if /([^a-zA-Z\d]+)(#{URL})/ =~ m
          s = $` + $1
          line_ar << s
          str = $2 + $' + after
        else
          line_ar << m
          str = after
        end

      else
        if /\A(.+?)([^#{SPECIAL}])/ =~ str
          line_ar << $1
          str = $2 + $'
        else
          line_ar << str
          str = ''
        end
      end

    end
  end

  return line_ar
end
tokenize(str) click to toggle source
Parse a line into tokens using strscan.

def self.strscan_tokenize(str)

# File vendor/qwik/lib/qwik/tokenizer-inline.rb, line 109
def self.tokenize(str)
  line_ar = []

  s = StringScanner.new(str)

  while ! s.eos?
    if s.scan(/'''(?!:')/)
      line_ar << :"'''"

    elsif s.scan(/''(?!:')/)
      line_ar << :"''"

    elsif s.scan(/==/)
      line_ar << :'=='

    elsif s.scan(/\[\[(.+?)\]\]/)  # [[title|url]] format
      line_ar << [:ref, s[1]]

    elsif s.scan(/\[([^\[\]\s]+?) ([^\[\]\s]+?)\]/) # [url title] format
      line_ar << [:ref, s[2]+'|'+s[1]]

    elsif s.scan(/\{\{([^\(\)]+?)(?:\((.*?)\))?\s*\}\}/)   # {{t(a)}}
      ar = [:plugin, s[1]]
      ar << s[2] if s[2]
      line_ar << ar

    elsif s.scan(/#{URL}/)
      href = s.matched
      line_ar << [:url, href]

    elsif s.scan(/[^#{SPECIAL}]+/)
      m = s.matched

      if /([^a-zA-Z\d]+)(#{URL})/ =~ m
        ss = $` + $1

        line_ar << ss

        skip_str = ss
        s.unscan
        s.pos = s.pos + skip_str.bytesize

      else
        line_ar << m
      end

    elsif s.scan(/(.+?)([^#{SPECIAL}])/)
      ss = s[1]
      line_ar << ss
      s.unscan
      s.pos = s.pos + ss.length

    else
      ss = s.string
      line_ar << ss[s.pos..ss.length]
      s.terminate

    end
  end

  line_ar
end