class RubyTokenizer::Tokenizer

Attributes

text[R]

Public Class Methods

new(text) click to toggle source
# File lib/ruby_tokenizer.rb, line 10
def initialize(text)
  @text = text
end

Public Instance Methods

rank() click to toggle source
# File lib/ruby_tokenizer.rb, line 14
def rank
  ranked = Hash[self.frequency.sort_by { |_word, count| count }.reverse]
  ranked.first(10)
end

Protected Instance Methods

filter() click to toggle source
# File lib/ruby_tokenizer.rb, line 21
def filter
  text.downcase.gsub(Patterns.basic, ' ')
end
frequency() click to toggle source
# File lib/ruby_tokenizer.rb, line 29
def frequency
  count = Hash.new(0)
  parsed = self.tokenize
  parsed.each { |word| count[word] += 1 }

  return count
end
tokenize() click to toggle source
# File lib/ruby_tokenizer.rb, line 25
def tokenize
  self.filter.scan(/[-\w'’.@]+/).map { |token| token.gsub(/[._-]$/, '') }
end