class RegexpExamples::ChargroupParser

A “sub-parser”, for char groups in a regular expression Some examples of what this class needs to parse:

abc
  • plain characters

a-z
  • ranges

nbd
  • escaped characters (which may represent character sets)

^abc
  • negated group

[a]
  • sub-groups (should match “a”, “b” or “c”)

[:lower:]
  • POSIX group

[a-f]&&
  • set intersection (should match “d”, “e” or “f”)

[^:alpha:]&&a-c
  • all of the above!!!! (should match “n”)

Attributes

current_position[R]
length[R]
regexp_string[R]

Public Class Methods

new(regexp_string, is_sub_group: false) click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 24
def initialize(regexp_string, is_sub_group: false)
  @regexp_string = regexp_string
  @is_sub_group = is_sub_group
  @current_position = 0
  @charset = []
  @negative = false
end

Public Instance Methods

parse() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 32
def parse
  parse_first_chars
  until next_char == ']'
    case next_char
    when '['
      parse_sub_group_concat
    when '-'
      parse_after_hyphen
    when '&'
      parse_after_ampersand
    else
      @charset.concat parse_checking_backlash
    end
  end

  @charset.uniq!
  @current_position += 1 # To account for final "]"
end
result() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 51
def result
  negate_if(@charset, @negative)
end

Private Instance Methods

next_char() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 149
def next_char
  regexp_string[@current_position]
end
parse_after_ampersand() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 113
def parse_after_ampersand
  if regexp_string[@current_position + 1] == '&'
    parse_sub_group_intersect
  else
    @charset << '&'
    @current_position += 1
  end
end
parse_after_backslash() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 91
def parse_after_backslash
  if next_char == 'b'
    @current_position += 1
    ["\b"]
  elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
    @current_position += 1
    parse_backslash_unicode_sequence(Regexp.last_match(1)).result.map(&:to_s)
  else
    char = CharSets::BackslashCharMap.fetch(next_char, [next_char])
    @current_position += 1
    char
  end
end
parse_after_hyphen() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 130
def parse_after_hyphen
  r = if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
    @current_position += 1
    @charset << '-'
  elsif rest_of_string =~ /\A-\\u(\h{4}|\{\h{1,4}\})/
    @current_position += 3
    char = parse_backslash_unicode_sequence(Regexp.last_match(1)).result.first.to_s
    @charset.concat((@charset.last..char).to_a)
  else
    @current_position += 1
    @charset.concat((@charset.last..parse_checking_backlash.first).to_a)
  end
  r
end
parse_checking_backlash() click to toggle source

Always returns an Array, for consistency

# File lib/regexp-examples/chargroup_parser.rb, line 80
def parse_checking_backlash
  if next_char == '\\'
    @current_position += 1
    parse_after_backslash
  else
    r = [next_char]
    @current_position += 1
    r
  end
end
parse_first_chars() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 57
def parse_first_chars
  if next_char == '^'
    @negative = true
    @current_position += 1
  end

  case rest_of_string
  when /\A[-\]]/ # e.g. /[]]/ (match "]") or /[-]/ (match "-")
    @charset << next_char
    @current_position += 1
  when /\A:(\^?)([^:]+):\]/ # e.g. [[:alpha:]] - POSIX group
    parse_posix_group(Regexp.last_match(1), Regexp.last_match(2)) if @is_sub_group
  end
end
parse_posix_group(negation_flag, name) click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 72
def parse_posix_group(negation_flag, name)
  @charset.concat negate_if(CharSets::POSIXCharMap[name], !negation_flag.empty?)
  @current_position += (negation_flag.length + # 0 or 1, if '^' is present
                        name.length +
                        2) # Length of opening and closing colons (always 2)
end
parse_sub_group_concat() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 105
def parse_sub_group_concat
  @current_position += 1
  sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
  sub_group_parser.parse
  @charset.concat sub_group_parser.result
  @current_position += sub_group_parser.length
end
parse_sub_group_intersect() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 122
def parse_sub_group_intersect
  @current_position += 2
  sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
  sub_group_parser.parse
  @charset &= sub_group_parser.result
  @current_position += (sub_group_parser.length - 1)
end
rest_of_string() click to toggle source
# File lib/regexp-examples/chargroup_parser.rb, line 145
def rest_of_string
  regexp_string[@current_position..-1]
end