module Grammars

Constants

VERSION

Public Class Methods

add(original, pos) click to toggle source
# File lib/grammars.rb, line 13
def self.add(original, pos)
  self.rules << Rule.new(original, pos)
end
clear() click to toggle source
# File lib/grammars.rb, line 17
def self.clear
  @rules = []
end
find_examples(input_file, output_file, grammar, limit = 1234567890) click to toggle source
# File lib/grammars.rb, line 67
def self.find_examples(input_file, output_file, grammar, limit = 1234567890)
  File.open(output_file, 'w') do |f|
    count = 0
    File.readlines(input_file).each do |line|
      if (self.parse(line) == grammar)
          f.puts(line)
          count += 1
      end
      break if count >= limit
    end
  end
end
freq(array) click to toggle source
# File lib/grammars.rb, line 59
def self.freq(array)
  freq_hash = Hash.new(0)
  array.each do |e|
    freq_hash[e] += 1
  end
  return freq_hash.sort_by {|k,v| v}.reverse
end
generate_frequencies(input_file, output_file) click to toggle source
# File lib/grammars.rb, line 49
def self.generate_frequencies(input_file, output_file)
  output = File.open(output_file, 'w')
  out = []
  File.readlines(input_file).each do |line|
    out << self.parse(line)
  end
  output.puts(self.freq(out))
  output.close
end
identify_grammar(tagged_text) click to toggle source
# File lib/grammars.rb, line 29
def self.identify_grammar(tagged_text)
  tagged_text.map { |tag| tag.values.first }.join(' ')
end
parse(text) click to toggle source
# File lib/grammars.rb, line 25
def self.parse(text)
  return identify_grammar(self.tag(text))
end
register(&block) click to toggle source
# File lib/grammars.rb, line 21
def self.register(&block)
  class_eval(&block)
end
rules() click to toggle source
# File lib/grammars.rb, line 9
def self.rules
  @rules ||= []
end
tag(raw, rules = self.rules) click to toggle source
# File lib/grammars.rb, line 33
def self.tag(raw, rules = self.rules)
  return [] if raw.empty?
  return [{ raw.strip => raw.strip }] if rules.empty?

  matches = raw.match(rules.first.matcher)
  # no matches, just process the next rule

  return tag(raw, rules[1..-1]) if matches.nil?

  before, after = raw.split(matches[0], 2)

  tag(before.strip, rules) +
  [{ matches[0] => rules.first.token }] +
  tag(after.strip, rules)
end