class TwitterCldr::Utils::RegexpSampler

Generates a valid string that would match the given regexp ast.

Constants

DIGITS
WORD_LETTERS

Attributes

regexp_ast[R]

Public Class Methods

new(regexp_ast) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 17
def initialize(regexp_ast)
  @regexp_ast = regexp_ast
end

Public Instance Methods

generate() click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 21
def generate
  walk_children(regexp_ast)
end

Private Instance Methods

class_name_for(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 145
def class_name_for(node)
  name = node.class.to_s.split("::").last
  name.gsub(/\A|([A-Z])/) { $1 ? "_#{$1.downcase}" : "" }.downcase
end
expand_charset(members) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 112
def expand_charset(members)
  members.inject([]) do |ret, member|
    ret + expand_charset_member(member)
  end
end
expand_charset_member(member) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 118
def expand_charset_member(member)
  left, right = member.scan(/([^\\])-?/).flatten
  right ? (left..right).to_a : [left]
end
quantifier_sample(arr, quantifier) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 123
def quantifier_sample(arr, quantifier)
  sample_size = if quantifier.min == quantifier.max
    quantifier.min
  else
    rand_in_quantifier(quantifier)
  end

  sample_size.times.map { single_sample(arr) }
end
rand_in_quantifier(quantifier) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 137
def rand_in_quantifier(quantifier)
  rand_in_range(quantifier.min, quantifier.max)
end
rand_in_range(min, max) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 141
def rand_in_range(min, max)
  min + rand((max - min) + 1)
end
single_sample(arr) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 133
def single_sample(arr)
  arr[rand(arr.size)]
end
walk(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 27
def walk(node)
  method = :"walk_#{class_name_for(node)}"
  respond_to?(method, true) ? send(method, node) : ""
end
walk_alternation(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 88
def walk_alternation(node)
  if node.quantified?
    rand_in_quantifier(node.quantifier).times.map do
      walk(single_sample(node.expressions))
    end.join
  else
    walk(single_sample(node.expressions))
  end
end
walk_alternative(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 98
def walk_alternative(node)
  walk_children(node)
end
walk_capture(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 70
def walk_capture(node)
  if node.quantified?
    rand_in_quantifier(node.quantifier).times.map do
      walk_children(node)
    end.join
  else
    walk_children(node)
  end
end
walk_character_set(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 60
def walk_character_set(node)
  charset = expand_charset(node.members)

  if node.quantified?
    quantifier_sample(charset, node.quantifier)
  else
    [single_sample(charset)]
  end.join + walk_children(node)
end
walk_children(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 32
def walk_children(node)
  node.expressions.map { |expr| walk(expr) }.join
end
walk_digit(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 36
def walk_digit(node)
  if node.quantified?
    quantifier_sample(DIGITS, node.quantifier)
  else
    [single_sample(DIGITS)]
  end.join + walk_children(node)
end
walk_literal(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 52
def walk_literal(node)
  node.text * if node.quantified?
    rand_in_quantifier(node.quantifier)
  else
    1
  end + walk_children(node)
end
walk_passive(node) click to toggle source

“passive” means non-capturing group. Since we don't need to distinguish between captures/non-captures, we can just delegate to the walk_capture method.

# File lib/twitter_cldr/utils/regexp_sampler.rb, line 84
def walk_passive(node)
  walk_capture(node)
end
walk_sequence(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 102
def walk_sequence(node)
  if node.quantified?
    rand_in_quantifier(node.quantifier).times.map do
      node.expressions.map { |expr| walk(expr) }.join
    end.join
  else
    node.expressions.map { |expr| walk(expr) }.join
  end
end
walk_word(node) click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 44
def walk_word(node)
  if node.quantified?
    quantifier_sample(WORD_LETTERS, node.quantifier)
  else
    [single_sample(WORD_LETTERS)]
  end.join + walk_children(node)
end