class Tomereader::Index
Public Class Methods
new()
click to toggle source
include Settings
# File lib/tomereader/index.rb, line 6 def initialize #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String #raise StandardError, "Content is empty" if content.empty? #@logger = create_logger @phrase_split_pattern = /[\.\;]/ @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/ @word_storage = Hash.new @total_words = [] @phrase_storage = [] end
Public Instance Methods
add(content)
click to toggle source
# File lib/tomereader/index.rb, line 16 def add(content) raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String raise StandardError, "Content is empty" if content.empty? phrase_strings = split_into_phrases(content) split(content) self end
check(word_string)
click to toggle source
# File lib/tomereader/index.rb, line 72 def check(word_string) word_string.kind_of?(String) && suitable?(word_string) end
create(word_string)
click to toggle source
# File lib/tomereader/index.rb, line 67 def create(word_string) if check word_string @word_storage[word_string] = Word.new(word_string) end end
find(word_string)
click to toggle source
# File lib/tomereader/index.rb, line 62 def find(word_string) if @word_storage.has_key?(word_string) @word_storage[word_string] end end
find_or_create(word_string)
click to toggle source
# File lib/tomereader/index.rb, line 75 def find_or_create(word_string) find(word_string) || create(word_string) end
phrases()
click to toggle source
# File lib/tomereader/index.rb, line 29 def phrases @phrase_storage end
split(content)
click to toggle source
розбиває текст на фрази, витягує слова, встановлює звязки: фраза -> слова, та слово -> фрази
# File lib/tomereader/index.rb, line 47 def split(content) split_into_phrases(content).map do |phrase_string| phrase = Phrase.new(phrase_string) phrase.split do |word_string, position| @total_words << word_string word = find_or_create(word_string) word.add(phrase, position) if word.is_a? Word end @phrase_storage << phrase end end
split_into_phrases(content)
click to toggle source
# File lib/tomereader/index.rb, line 23 def split_into_phrases(content) content.split @phrase_split_pattern end
suitable?(word_string)
click to toggle source
word word_storage
# File lib/tomereader/index.rb, line 59 def suitable? word_string word_string =~ @word_pattern end
to_s()
click to toggle source
# File lib/tomereader/index.rb, line 42 def to_s {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count} end
words()
click to toggle source
# File lib/tomereader/index.rb, line 26 def words @word_storage end
words_sorted_by_alphabet(count=nil)
click to toggle source
# File lib/tomereader/index.rb, line 32 def words_sorted_by_alphabet(count=nil) words = self.words.sort_by{|key, value| key} words = words.first(count) unless count.nil? Hash[words] end
words_sorted_by_frequency(count=nil)
click to toggle source
# File lib/tomereader/index.rb, line 37 def words_sorted_by_frequency(count=nil) words = self.words.sort_by{|key, value| value.frequency} words = words.first(count) unless count.nil? Hash[words] end