module RegexpExamples::ParseAfterBackslashGroupHelper

A collection of related helper methods, utilised by the ‘Parser` class

Protected Instance Methods

parse_after_backslash_group() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 8
def parse_after_backslash_group
  @current_position += 1
  if rest_of_string =~ /\A(\d{1,3})/
    parse_regular_backreference_group(Regexp.last_match(1))
  elsif rest_of_string =~ /\Ak['<]([\w-]+)['>]/
    parse_named_backreference_group(Regexp.last_match(1))
  elsif CharSets::BackslashCharMap.keys.include?(next_char)
    parse_backslash_special_char
  elsif rest_of_string =~ /\A(c|C-)(.)/
    parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
  elsif rest_of_string =~ /\Ax(\h{1,2})/
    parse_backslash_escape_sequence(Regexp.last_match(1))
  elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
    parse_backslash_unicode_sequence(Regexp.last_match(1))
  elsif rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
    parse_backslash_named_property(
      Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
    )
  elsif next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
    PlaceHolderGroup.new
  elsif next_char == 'R'
    parse_backslash_linebreak
  elsif next_char == 'g'
    parse_backslash_subexpresion_call
  elsif next_char =~ /[bB]/
    parse_backslash_anchor
  elsif next_char =~ /[AG]/
    parse_backslash_start_of_string
  elsif next_char =~ /[zZ]/
    parse_backslash_end_of_string
  else
    parse_single_char_group(next_char)
  end
end
parse_backreference_group(group_id) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 59
def parse_backreference_group(group_id)
  BackReferenceGroup.new(group_id)
end
parse_backslash_anchor() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 125
def parse_backslash_anchor
  raise_anchors_exception!
end
parse_backslash_control_char(control_syntax, control_code) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 70
def parse_backslash_control_char(control_syntax, control_code)
  @current_position += control_syntax.length
  parse_single_char_group(parse_control_character(control_code))
end
parse_backslash_end_of_string() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 137
def parse_backslash_end_of_string
  if @current_position == (regexp_string.length - 1)
    if next_char == 'z'
      PlaceHolderGroup.new
    else # next_char == 'Z'
      QuestionMarkRepeater.new(SingleCharGroup.new("\n", @ignorecase))
    end
  else
    raise_anchors_exception!
  end
end
parse_backslash_escape_sequence(escape_sequence) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 75
def parse_backslash_escape_sequence(escape_sequence)
  @current_position += escape_sequence.length
  parse_single_char_group(parse_unicode_sequence(escape_sequence))
end
parse_backslash_linebreak() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 113
def parse_backslash_linebreak
  CharGroup.new(
    ["\r\n", "\n", "\v", "\f", "\r"],
    @ignorecase
  ) # Using "\r\n" as one character is little bit hacky...
end
parse_backslash_named_property(p_negation, caret_negation, property_name) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 95
def parse_backslash_named_property(p_negation, caret_negation, property_name)
  @current_position += (caret_negation.length + # 0 or 1, if '^' is present
                        property_name.length +
                        2) # Length of opening and closing brackets (always 2)
  # Beware of double negatives! E.g. /\P{^Space}/
  is_negative = (p_negation == 'P') ^ (caret_negation == '^')
  CharGroup.new(
    negate_if(
      RegexpPropertyValues[property_name]
        .matched_codepoints
        .lazy
        .filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },
      is_negative
    ),
    @ignorecase
  )
end
parse_backslash_special_char() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 63
def parse_backslash_special_char
  CharGroup.new(
    CharSets::BackslashCharMap[next_char].dup,
    @ignorecase
  )
end
parse_backslash_start_of_string() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 129
def parse_backslash_start_of_string
  if @current_position == 1
    PlaceHolderGroup.new
  else
    raise_anchors_exception!
  end
end
parse_backslash_subexpresion_call() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 120
def parse_backslash_subexpresion_call
  raise IllegalSyntaxError,
        'Subexpression calls (\\g) cannot be supported, as they are not regular'
end
parse_backslash_unicode_sequence(full_hex_sequence) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 89
def parse_backslash_unicode_sequence(full_hex_sequence)
  @current_position += full_hex_sequence.length
  sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
  parse_single_char_group(parse_unicode_sequence(sequence))
end
parse_control_character(char) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 80
def parse_control_character(char)
  (char.ord % 32).chr # Black magic!
  # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
end
parse_named_backreference_group(group_name) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 48
def parse_named_backreference_group(group_name)
  @current_position += (group_name.length + 2)
  group_id = if group_name.to_i < 0
               # RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
               @num_groups + group_name.to_i + 1
             else
               group_name
             end
  parse_backreference_group(group_id)
end
parse_regular_backreference_group(group_id) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 43
def parse_regular_backreference_group(group_id)
  @current_position += (group_id.length - 1) # In case of 10+ backrefs!
  parse_backreference_group(group_id)
end
parse_unicode_sequence(match) click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 85
def parse_unicode_sequence(match)
  [match.to_i(16)].pack('U')
end
raise_anchors_exception!() click to toggle source
# File lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb, line 149
def raise_anchors_exception!
  raise IllegalSyntaxError,
        "Anchors ('#{next_char}') cannot be supported, as they are not regular"
end