module Grammars
Constants
- VERSION
Public Class Methods
add(original, pos)
click to toggle source
# File lib/grammars.rb, line 13 def self.add(original, pos) self.rules << Rule.new(original, pos) end
clear()
click to toggle source
# File lib/grammars.rb, line 17 def self.clear @rules = [] end
find_examples(input_file, output_file, grammar, limit = 1234567890)
click to toggle source
# File lib/grammars.rb, line 67 def self.find_examples(input_file, output_file, grammar, limit = 1234567890) File.open(output_file, 'w') do |f| count = 0 File.readlines(input_file).each do |line| if (self.parse(line) == grammar) f.puts(line) count += 1 end break if count >= limit end end end
freq(array)
click to toggle source
# File lib/grammars.rb, line 59 def self.freq(array) freq_hash = Hash.new(0) array.each do |e| freq_hash[e] += 1 end return freq_hash.sort_by {|k,v| v}.reverse end
generate_frequencies(input_file, output_file)
click to toggle source
# File lib/grammars.rb, line 49 def self.generate_frequencies(input_file, output_file) output = File.open(output_file, 'w') out = [] File.readlines(input_file).each do |line| out << self.parse(line) end output.puts(self.freq(out)) output.close end
identify_grammar(tagged_text)
click to toggle source
# File lib/grammars.rb, line 29 def self.identify_grammar(tagged_text) tagged_text.map { |tag| tag.values.first }.join(' ') end
parse(text)
click to toggle source
# File lib/grammars.rb, line 25 def self.parse(text) return identify_grammar(self.tag(text)) end
register(&block)
click to toggle source
# File lib/grammars.rb, line 21 def self.register(&block) class_eval(&block) end
rules()
click to toggle source
# File lib/grammars.rb, line 9 def self.rules @rules ||= [] end
tag(raw, rules = self.rules)
click to toggle source
# File lib/grammars.rb, line 33 def self.tag(raw, rules = self.rules) return [] if raw.empty? return [{ raw.strip => raw.strip }] if rules.empty? matches = raw.match(rules.first.matcher) # no matches, just process the next rule return tag(raw, rules[1..-1]) if matches.nil? before, after = raw.split(matches[0], 2) tag(before.strip, rules) + [{ matches[0] => rules.first.token }] + tag(after.strip, rules) end