class RubyTokenizer::Tokenizer
Attributes
text[R]
Public Class Methods
new(text)
click to toggle source
# File lib/ruby_tokenizer.rb, line 10 def initialize(text) @text = text end
Public Instance Methods
rank()
click to toggle source
# File lib/ruby_tokenizer.rb, line 14 def rank ranked = Hash[self.frequency.sort_by { |_word, count| count }.reverse] ranked.first(10) end
Protected Instance Methods
filter()
click to toggle source
# File lib/ruby_tokenizer.rb, line 21 def filter text.downcase.gsub(Patterns.basic, ' ') end
frequency()
click to toggle source
# File lib/ruby_tokenizer.rb, line 29 def frequency count = Hash.new(0) parsed = self.tokenize parsed.each { |word| count[word] += 1 } return count end
tokenize()
click to toggle source
# File lib/ruby_tokenizer.rb, line 25 def tokenize self.filter.scan(/[-\w'’.@]+/).map { |token| token.gsub(/[._-]$/, '') } end