class TwitterCldr::Utils::RegexpSampler
Generates a valid string that would match the given regexp ast.
Constants
- DIGITS
- WORD_LETTERS
Attributes
regexp_ast[R]
Public Class Methods
new(regexp_ast)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 17 def initialize(regexp_ast) @regexp_ast = regexp_ast end
Public Instance Methods
generate()
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 21 def generate walk_children(regexp_ast) end
Private Instance Methods
class_name_for(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 145 def class_name_for(node) name = node.class.to_s.split("::").last name.gsub(/\A|([A-Z])/) { $1 ? "_#{$1.downcase}" : "" }.downcase end
expand_charset(members)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 112 def expand_charset(members) members.inject([]) do |ret, member| ret + expand_charset_member(member) end end
expand_charset_member(member)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 118 def expand_charset_member(member) left, right = member.scan(/([^\\])-?/).flatten right ? (left..right).to_a : [left] end
quantifier_sample(arr, quantifier)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 123 def quantifier_sample(arr, quantifier) sample_size = if quantifier.min == quantifier.max quantifier.min else rand_in_quantifier(quantifier) end sample_size.times.map { single_sample(arr) } end
rand_in_quantifier(quantifier)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 137 def rand_in_quantifier(quantifier) rand_in_range(quantifier.min, quantifier.max) end
rand_in_range(min, max)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 141 def rand_in_range(min, max) min + rand((max - min) + 1) end
single_sample(arr)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 133 def single_sample(arr) arr[rand(arr.size)] end
walk(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 27 def walk(node) method = :"walk_#{class_name_for(node)}" respond_to?(method, true) ? send(method, node) : "" end
walk_alternation(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 88 def walk_alternation(node) if node.quantified? rand_in_quantifier(node.quantifier).times.map do walk(single_sample(node.expressions)) end.join else walk(single_sample(node.expressions)) end end
walk_alternative(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 98 def walk_alternative(node) walk_children(node) end
walk_capture(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 70 def walk_capture(node) if node.quantified? rand_in_quantifier(node.quantifier).times.map do walk_children(node) end.join else walk_children(node) end end
walk_character_set(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 60 def walk_character_set(node) charset = expand_charset(node.members) if node.quantified? quantifier_sample(charset, node.quantifier) else [single_sample(charset)] end.join + walk_children(node) end
walk_children(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 32 def walk_children(node) node.expressions.map { |expr| walk(expr) }.join end
walk_digit(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 36 def walk_digit(node) if node.quantified? quantifier_sample(DIGITS, node.quantifier) else [single_sample(DIGITS)] end.join + walk_children(node) end
walk_literal(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 52 def walk_literal(node) node.text * if node.quantified? rand_in_quantifier(node.quantifier) else 1 end + walk_children(node) end
walk_passive(node)
click to toggle source
“passive” means non-capturing group. Since we don't need to distinguish between captures/non-captures, we can just delegate to the walk_capture
method.
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 84 def walk_passive(node) walk_capture(node) end
walk_sequence(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 102 def walk_sequence(node) if node.quantified? rand_in_quantifier(node.quantifier).times.map do node.expressions.map { |expr| walk(expr) }.join end.join else node.expressions.map { |expr| walk(expr) }.join end end
walk_word(node)
click to toggle source
# File lib/twitter_cldr/utils/regexp_sampler.rb, line 44 def walk_word(node) if node.quantified? quantifier_sample(WORD_LETTERS, node.quantifier) else [single_sample(WORD_LETTERS)] end.join + walk_children(node) end