class SrlRuby::ASTBuilder

The purpose of a ASTBuilder is to build piece by piece an AST (Abstract Syntax Tree) from a sequence of input tokens and visit events produced by walking over a GFGParsing object. Uses the Builder GoF pattern. The Builder pattern creates a complex object (say, a parse tree) from simpler objects (terminal and non-terminal nodes) and using a step by step approach.

Constants

Terminal2NodeClass

Attributes

options[R]

Public Class Methods

new(theTokens) click to toggle source

Create a new AST builder instance. @param theTokens [Array<Token>] The sequence of input tokens.

Calls superclass method
# File lib/srl_ruby/ast_builder.rb, line 21
def initialize(theTokens)
  super(theTokens)
  @options = []
end

Public Instance Methods

done!() click to toggle source

Notification that the parse tree construction is complete.

Calls superclass method
# File lib/srl_ruby/ast_builder.rb, line 27
def done!
  apply_options
  super
end

Protected Instance Methods

apply_options() click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 38
def apply_options
  tree_root = result.root
  regexp_opts = []
  options.each do |opt|
    if opt == :ALL_LAZY
      tree_root.lazy!
    else
      regexp_opts << opt
    end
  end
  return if regexp_opts.empty?

  new_root = Regex::MatchOption.new(tree_root, regexp_opts)
  result.instance_variable_set(:@root, new_root)
end
begin_anchor() click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 117
def begin_anchor
  Regex::Anchor.new('^')
end
char_class(toNegate, *theChildren) click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 101
def char_class(toNegate, *theChildren)
  Regex::CharClass.new(toNegate, *theChildren)
end
char_range(lowerBound, upperBound) click to toggle source

rubocop: enable Style/OptionalBooleanParameter

# File lib/srl_ruby/ast_builder.rb, line 95
def char_range(lowerBound, upperBound)
  lower = Regex::Character.new(lowerBound)
  upper = Regex::Character.new(upperBound)
  return Regex::CharRange.new(lower, upper)
end
char_shorthand(shortName) click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 105
def char_shorthand(shortName)
  Regex::CharShorthand.new(shortName)
end
make_last_repetition_lazy(anExpr) click to toggle source

If the rightmost (sub)expression is a repetition, then make it lazy

# File lib/srl_ruby/ast_builder.rb, line 380
def make_last_repetition_lazy(anExpr)
  sub_expr = anExpr
  loop do
    if sub_expr.is_a?(Regex::Repetition)
        # Make repetition lazy
        cardinality = sub_expr.multiplicity
        cardinality.instance_variable_set(:@policy, :lazy)
        break
    elsif sub_expr.kind_of?(Regex::PolyadicExpression)
      sub_expr = sub_expr.children.last
    elsif sub_expr.kind_of?(Regex::MonadicExpression)
      sub_expr = sub_expr.child
    elsif sub_expr.kind_of?(Regex::AtomicExpression)
      break
    end
  end
end
multiplicity(lowerBound, upperBound) click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 67
def multiplicity(lowerBound, upperBound)
  return Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
end
new_leaf_node(_production, _terminal, aTokenPosition, aToken) click to toggle source

Overriding method. Factory method for creating a node object for the given input token. @param _production [Rley::Syntax::Production] @param _terminal [Rley::Syntax::Terminal] Terminal symbol associated with the token @param aTokenPosition [Integer] Position of token in the input stream @param aToken [Rley::Lexical::Token] The input token

# File lib/srl_ruby/ast_builder.rb, line 61
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
  node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)

  return node
end
reduce_all_lazy(_production, _range, _tokens, _children) click to toggle source

rule('single_flag' => %w[ALL LAZY]).tag 'all_lazy'

# File lib/srl_ruby/ast_builder.rb, line 160
def reduce_all_lazy(_production, _range, _tokens, _children)
  :ALL_LAZY
end
reduce_alternative_list(_production, _range, _tokens, theChildren) click to toggle source

rule('alternatives' => %w[alternatives separator quantifiable]).tag 'alternative_list'

# File lib/srl_ruby/ast_builder.rb, line 365
def reduce_alternative_list(_production, _range, _tokens, theChildren)
  return theChildren[0] << theChildren[-1]
end
reduce_any_character(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => %w[ANY CHARACTER]).tag 'any_character'

# File lib/srl_ruby/ast_builder.rb, line 239
def reduce_any_character(_production, _range, _tokens, _children)
  char_shorthand('w')
end
reduce_any_lowercase(_production, _range, _tokens, _children) click to toggle source

rule('letter_range' => 'LETTER').tag 'any_lowercase'

# File lib/srl_ruby/ast_builder.rb, line 219
def reduce_any_lowercase(_production, _range, _tokens, _children)
  ch_range = char_range('a', 'z')
  char_class(false, ch_range)
end
reduce_any_of(_production, _range, _tokens, theChildren) click to toggle source

rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).tag 'any_of'

# File lib/srl_ruby/ast_builder.rb, line 350
def reduce_any_of(_production, _range, _tokens, theChildren)
  first_alternative = theChildren[3].first
  result = nil

  # Ugly: in SRL, comma is a dummy separator except in any of construct...
  if theChildren[3].size == 1 && first_alternative.kind_of?(Regex::Concatenation)
    result = Regex::Alternation.new(*first_alternative.children)
  else
    result = Regex::Alternation.new(*theChildren[3])
  end

  return result
end
reduce_any_uppercase(_production, _range, _tokens, _children) click to toggle source

rule('letter_range' => %w[UPPERCASE LETTER]).tag 'any_uppercase'

# File lib/srl_ruby/ast_builder.rb, line 225
def reduce_any_uppercase(_production, _range, _tokens, _children)
  ch_range = char_range('A', 'Z')
  char_class(false, ch_range)
end
reduce_anything(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => 'ANYTHING').tag 'anything'

# File lib/srl_ruby/ast_builder.rb, line 269
def reduce_anything(_production, _range, _tokens, _children)
  wildcard
end
reduce_assertable(_production, _range, _tokens, theChildren) click to toggle source

rule('assertable' => 'term quantifier?').tag 'assertable'

# File lib/srl_ruby/ast_builder.rb, line 197
def reduce_assertable(_production, _range, _tokens, theChildren)
  (term, quantifier) = theChildren
  quantifier ? repetition(term, quantifier) : term
end
reduce_assertion_sub_pattern(_production, aRange, theTokens, theChildren) click to toggle source

rule('sub_pattern' => 'assertion').tag 'assertion_sub_pattern'

# File lib/srl_ruby/ast_builder.rb, line 140
def reduce_assertion_sub_pattern(_production, aRange, theTokens, theChildren)
  return_first_child(aRange, theTokens, theChildren)
end
reduce_at_least(_production, _range, _tokens, theChildren) click to toggle source

rule('quantifier' => %w[AT LEAST count TIMES]).tag 'at_least'

# File lib/srl_ruby/ast_builder.rb, line 464
def reduce_at_least(_production, _range, _tokens, theChildren)
  count = theChildren[2].token.lexeme.to_i
  multiplicity(count, :more)
end
reduce_backslash(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => 'BACKSLASH').tag 'backslash'

# File lib/srl_ruby/ast_builder.rb, line 309
def reduce_backslash(_production, _range, _tokens, _children)
  # Double the backslash (because of escaping)
  string_literal('\\', true)
end
reduce_begin_with(_production, _range, _tokens, _children) click to toggle source

rule 'begin_anchor' => %w[BEGIN WITH]

# File lib/srl_ruby/ast_builder.rb, line 175
def reduce_begin_with(_production, _range, _tokens, _children)
  begin_anchor
end
reduce_between_and(_production, _range, _tokens, theChildren) click to toggle source

rule('quantifier' => 'BETWEEN count AND count times_suffix').tag

'between_and'
# File lib/srl_ruby/ast_builder.rb, line 442
def reduce_between_and(_production, _range, _tokens, theChildren)
  lower = theChildren[1].token.lexeme.to_i
  upper = theChildren[3].token.lexeme.to_i
  multiplicity(lower, upper)
end
reduce_capture(_production, _range, _tokens, theChildren) click to toggle source

rule('capturing_group' => 'CAPTURE assertable (UNTIL assertable)?').tag 'capture'

# File lib/srl_ruby/ast_builder.rb, line 400
def reduce_capture(_production, _range, _tokens, theChildren)
  return Regex::CapturingGroup.new(theChildren[1]) unless theChildren[2]

  # Until semantic requires that the last pattern in capture to be lazy
  make_last_repetition_lazy(theChildren[1])

  group = Regex::CapturingGroup.new(theChildren[1])
  (_, until_expr) = theChildren[2]
  Regex::Concatenation.new(group, until_expr)
end
reduce_carriage_return(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => %w[CARRIAGE RETURN]).tag 'carriage_return'

# File lib/srl_ruby/ast_builder.rb, line 321
def reduce_carriage_return(_production, _range, _tokens, _children)
  Regex::Character.new('\r')
end
reduce_case_insensitive(_production, _range, _tokens, _children) click to toggle source

rule('single_flag' => %w[CASE INSENSITIVE]).tag 'case_insensitive'

# File lib/srl_ruby/ast_builder.rb, line 150
def reduce_case_insensitive(_production, _range, _tokens, _children)
  Regexp::IGNORECASE
end
reduce_digit(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => 'digit_or_number').tag 'digit'

# File lib/srl_ruby/ast_builder.rb, line 249
def reduce_digit(_production, _range, _tokens, _children)
  char_shorthand('d')
end
reduce_digits_from_to(_production, _range, _tokens, theChildren) click to toggle source

rule('digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]).tag 'digits_from_to'

# File lib/srl_ruby/ast_builder.rb, line 231
def reduce_digits_from_to(_production, _range, _tokens, theChildren)
  raw_range = [theChildren[2].token.lexeme, theChildren[4].token.lexeme]
  range_sorted = raw_range.map(&:to_i).sort
  ch_range = char_range(range_sorted[0].to_s, range_sorted[1].to_s)
  char_class(false, ch_range)
end
reduce_end_anchor(_production, _range, _tokens, _children) click to toggle source

rule('end_anchor' => %w[separator MUST END]).tag 'end_anchor'

# File lib/srl_ruby/ast_builder.rb, line 180
def reduce_end_anchor(_production, _range, _tokens, _children)
  Regex::Anchor.new('$')
end
reduce_exactly(_production, _range, _tokens, theChildren) click to toggle source

rule('quantifier' => %w[EXACTLY count TIMES]).tag 'exactly'

# File lib/srl_ruby/ast_builder.rb, line 435
def reduce_exactly(_production, _range, _tokens, theChildren)
  count = theChildren[1].token.lexeme.to_i
  multiplicity(count, count)
end
reduce_flag_sequence(_production, _range, _tokens, theChildren) click to toggle source

rule('flags' => '(separator single_flag)+').tag 'flag_sequence'

# File lib/srl_ruby/ast_builder.rb, line 145
def reduce_flag_sequence(_production, _range, _tokens, theChildren)
  theChildren[0].map { |(_, flag)| flag }
end
reduce_flagged_expr(_production, aRange, theTokens, theChildren) click to toggle source

rule('expression' => 'pattern (flags)?').tag 'flagged_expr'

# File lib/srl_ruby/ast_builder.rb, line 122
def reduce_flagged_expr(_production, aRange, theTokens, theChildren)
  @options = theChildren[1] if theChildren[1]
  return_first_child(aRange, theTokens, theChildren)
end
reduce_grouping_parenthenses(_production, _range, _tokens, theChildren) click to toggle source

rule('grouping' => %w[LPAREN pattern RPAREN]).tag 'grouping_parenthenses'

# File lib/srl_ruby/ast_builder.rb, line 375
def reduce_grouping_parenthenses(_production, _range, _tokens, theChildren)
  Regex::NonCapturingGroup.new(theChildren[1])
end
reduce_if_followed(_production, _range, _tokens, theChildren) click to toggle source

rule('assertion' => 'IF NOT? FOLLOWED BY assertable')

# File lib/srl_ruby/ast_builder.rb, line 185
def reduce_if_followed(_production, _range, _tokens, theChildren)
  polarity = theChildren[1] ? :negative : :positive
  Regex::Lookaround.new(theChildren.last, :ahead, polarity)
end
reduce_if_had(_production, _range, _tokens, theChildren) click to toggle source

rule('assertion' => 'IF NOT? ALREADY HAD assertable')

# File lib/srl_ruby/ast_builder.rb, line 191
def reduce_if_had(_production, _range, _tokens, theChildren)
  polarity = theChildren[1] ? :negative : :positive
  Regex::Lookaround.new(theChildren.last, :behind, polarity)
end
reduce_literally(_production, _range, _tokens, theChildren) click to toggle source

rule('literal' => %w[LITERALLY STRING_LIT]).tag 'literally'

# File lib/srl_ruby/ast_builder.rb, line 336
def reduce_literally(_production, _range, _tokens, theChildren)
  # What if literal is empty?...

  raw_literal = theChildren[-1].token.lexeme.dup
  return string_literal(raw_literal)
end
reduce_lowercase_from_to(_production, _range, _tokens, theChildren) click to toggle source

rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).tag 'lowercase_from_to'

# File lib/srl_ruby/ast_builder.rb, line 203
def reduce_lowercase_from_to(_production, _range, _tokens, theChildren)
  raw_range = [theChildren[2].token.lexeme, theChildren[4].token.lexeme]
  range_sorted = raw_range.sort
  ch_range = char_range(range_sorted[0], range_sorted[1])
  char_class(false, ch_range)
end
reduce_multi_line(_production, _range, _tokens, _children) click to toggle source

rule('single_flag' => %w[MULTI LINE]).tag 'multi_line'

# File lib/srl_ruby/ast_builder.rb, line 155
def reduce_multi_line(_production, _range, _tokens, _children)
  Regexp::MULTILINE
end
reduce_named_capture(_production, _range, _tokens, theChildren) click to toggle source

rule('capturing_group' => 'CAPTURE assertable AS var_name (UNTIL assertable)?').tag 'named_capture'

# File lib/srl_ruby/ast_builder.rb, line 413
def reduce_named_capture(_production, _range, _tokens, theChildren)
  name = theChildren[3].token.lexeme.dup
  return Regex::CapturingGroup.new(theChildren[1], name) unless theChildren[4]

  # Until semantic requires that the last pattern in capture to be lazy
  make_last_repetition_lazy(theChildren[1])
  group = Regex::CapturingGroup.new(theChildren[1], name)
  (_, until_expr) = theChildren[4]
  return Regex::Concatenation.new(group, until_expr)
end
reduce_never_or_more(_production, _range, _tokens, _children) click to toggle source

rule('quantifier' => %w[NEVER OR MORE]).tag 'never_or_more'

# File lib/srl_ruby/ast_builder.rb, line 459
def reduce_never_or_more(_production, _range, _tokens, _children)
  multiplicity(0, :more)
end
reduce_new_line(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => %w[NEW LINE]).tag 'new_line'

# File lib/srl_ruby/ast_builder.rb, line 315
def reduce_new_line(_production, _range, _tokens, _children)
  # TODO: control portability
  Regex::Character.new('\n')
end
reduce_no_character(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => %w[NO CHARACTER]).tag 'no_character'

# File lib/srl_ruby/ast_builder.rb, line 244
def reduce_no_character(_production, _range, _tokens, _children)
  char_shorthand('W')
end
reduce_no_whitespace(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => %w[NO WHITESPACE]).tag 'no_whitespace'

# File lib/srl_ruby/ast_builder.rb, line 264
def reduce_no_whitespace(_production, _range, _tokens, _children)
  char_shorthand('S')
end
reduce_no_word(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => %w[NO WORD]).tag 'no word'

# File lib/srl_ruby/ast_builder.rb, line 331
def reduce_no_word(_production, _range, _tokens, _children)
  Regex::Anchor.new('\B')
end
reduce_non_digit(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => %w[NO DIGIT]).tag 'non_digit'

# File lib/srl_ruby/ast_builder.rb, line 254
def reduce_non_digit(_production, _range, _tokens, _children)
  char_shorthand('D')
end
reduce_none_of(_production, _range, _tokens, theChildren) click to toggle source

rule('character_class' => %w[NONE OF STRING_LIT]).tag 'none_of'

# File lib/srl_ruby/ast_builder.rb, line 290
def reduce_none_of(_production, _range, _tokens, theChildren)
  raw_literal = theChildren[-1].token.lexeme.dup
  chars = raw_literal.chars.map do |ch|
    Regex::Character.new(ch)
  end
  Regex::CharClass.new(true, *chars)
end
reduce_once(_production, _range, _tokens, _children) click to toggle source

rule('quantifier' => 'ONCE').tag 'once'

# File lib/srl_ruby/ast_builder.rb, line 425
def reduce_once(_production, _range, _tokens, _children)
  multiplicity(1, 1)
end
reduce_once_or_more(_production, _range, _tokens, _children) click to toggle source

rule('quantifier' => %w[ONCE OR MORE]).tag 'once_or_more'

# File lib/srl_ruby/ast_builder.rb, line 454
def reduce_once_or_more(_production, _range, _tokens, _children)
  multiplicity(1, :more)
end
reduce_one_of(_production, _range, _tokens, theChildren) click to toggle source

rule('character_class' => %w[ONE OF STRING_LIT]).tag 'one_of'

# File lib/srl_ruby/ast_builder.rb, line 274
def reduce_one_of(_production, _range, _tokens, theChildren)
  raw_literal = theChildren[-1].token.lexeme.dup
  alternatives = raw_literal.chars.map do |ch|
    if Regex::Character::MetaCharsInClass.include?(ch)
      chars = [Regex::Character.new('\\'), Regex::Character.new(ch)]
      Regex::Concatenation.new(*chars)
    else
      Regex::Character.new(ch)
    end
  end

  # TODO check other implementations
  return Regex::CharClass.new(false, *alternatives)
end
reduce_optional(_production, _range, _tokens, _children) click to toggle source

rule('quantifier' => 'OPTIONAL').tag 'optional'

# File lib/srl_ruby/ast_builder.rb, line 449
def reduce_optional(_production, _range, _tokens, _children)
  multiplicity(0, 1)
end
reduce_pattern_sequence(_production, _range, _tokens, theChildren) click to toggle source

rule('pattern' => 'subpattern (separator sub_pattern)*').tag 'pattern_sequence'

# File lib/srl_ruby/ast_builder.rb, line 128
def reduce_pattern_sequence(_production, _range, _tokens, theChildren)
  return theChildren[0] if theChildren[1].empty?

  successors = theChildren[1].map { |pair| pair[1] }
  if successors[0].kind_of?(Regex::Lookaround) && successors[0].dir == :behind
    Regex::Concatenation.new(successors.shift, theChildren[0], *successors)
  else
    Regex::Concatenation.new(theChildren[0], *successors)
  end
end
reduce_quantifiable(_production, _range, _tokens, theChildren) click to toggle source

rule('quantifiable' => 'begin_anchor? anchorable end_anchor?')

# File lib/srl_ruby/ast_builder.rb, line 165
def reduce_quantifiable(_production, _range, _tokens, theChildren)
  Regex::Concatenation.new(*theChildren.compact)
end
reduce_raw_literal(_production, _range, _tokens, theChildren) click to toggle source

rule('raw' => %w[RAW STRING_LIT]).tag 'raw_literal'

# File lib/srl_ruby/ast_builder.rb, line 344
def reduce_raw_literal(_production, _range, _tokens, theChildren)
  raw_literal = theChildren[-1].token.lexeme.dup
  return Regex::RawExpression.new(raw_literal)
end
reduce_simple_alternative(_production, _range, _tokens, theChildren) click to toggle source

rule('alternatives' => 'quantifiable').tag 'simple_alternative'

# File lib/srl_ruby/ast_builder.rb, line 370
def reduce_simple_alternative(_production, _range, _tokens, theChildren)
  [theChildren.last]
end
reduce_starts_with(_production, _range, _tokens, _children) click to toggle source

rule 'begin_anchor' => %w[STARTS WITH]

# File lib/srl_ruby/ast_builder.rb, line 170
def reduce_starts_with(_production, _range, _tokens, _children)
  begin_anchor
end
reduce_tab(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => 'TAB').tag 'tab'

# File lib/srl_ruby/ast_builder.rb, line 299
def reduce_tab(_production, _range, _tokens, _children)
  Regex::Character.new('\t')
end
reduce_twice(_production, _range, _tokens, _children) click to toggle source

rule('quantifier' => 'TWICE').tag 'twice'

# File lib/srl_ruby/ast_builder.rb, line 430
def reduce_twice(_production, _range, _tokens, _children)
  multiplicity(2, 2)
end
reduce_uppercase_from_to(_production, _range, _tokens, theChildren) click to toggle source

rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).tag 'uppercase_from_to'

# File lib/srl_ruby/ast_builder.rb, line 211
def reduce_uppercase_from_to(_production, _range, _tokens, theChildren)
  raw_range = [theChildren[3].token.lexeme, theChildren[5].token.lexeme]
  range_sorted = raw_range.sort
  ch_range = char_range(range_sorted[0], range_sorted[1])
  char_class(false, ch_range)
end
reduce_vtab(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => ' VERTICAL TAB').tag 'vtab'

# File lib/srl_ruby/ast_builder.rb, line 304
def reduce_vtab(_production, _range, _tokens, _children)
  Regex::Character.new('\v')
end
reduce_whitespace(_production, _range, _tokens, _children) click to toggle source

rule('character_class' => 'WHITESPACE').tag 'whitespace'

# File lib/srl_ruby/ast_builder.rb, line 259
def reduce_whitespace(_production, _range, _tokens, _children)
  char_shorthand('s')
end
reduce_word(_production, _range, _tokens, _children) click to toggle source

rule('special_char' => %w).tag 'word'

# File lib/srl_ruby/ast_builder.rb, line 326
def reduce_word(_production, _range, _tokens, _children)
  Regex::Anchor.new('\b')
end
repetition(expressionToRepeat, aMultiplicity) click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 113
def repetition(expressionToRepeat, aMultiplicity)
  Regex::Repetition.new(expressionToRepeat, aMultiplicity)
end
string_literal(aString, to_escape = true) click to toggle source

rubocop: disable Style/OptionalBooleanParameter

# File lib/srl_ruby/ast_builder.rb, line 73
def string_literal(aString, to_escape = true)
  if aString.size > 1
    chars = []
    aString.each_char do |ch|
      if to_escape && Regex::Character::MetaChars.include?(ch)
        chars << Regex::Character.new('\\')
      end
      chars << Regex::Character.new(ch)
    end
    result = Regex::Concatenation.new(*chars)
  elsif to_escape && Regex::Character::MetaChars.include?(aString)
    backslash = Regex::Character.new('\\')
    a_string = Regex::Character.new(aString)
    result = Regex::Concatenation.new(backslash, a_string)
  else
    result = Regex::Character.new(aString)
  end

  return result
end
terminal2node() click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 34
def terminal2node
  Terminal2NodeClass
end
wildcard() click to toggle source
# File lib/srl_ruby/ast_builder.rb, line 109
def wildcard
  Regex::Wildcard.new
end