class Tomereader::Index

Public Class Methods

new() click to toggle source

include Settings

# File lib/tomereader/index.rb, line 6
def initialize
  #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
  #raise StandardError, "Content is empty" if content.empty?
  #@logger = create_logger
  @phrase_split_pattern = /[\.\;]/
  @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/
  @word_storage = Hash.new
  @total_words = []
  @phrase_storage = []
end

Public Instance Methods

add(content) click to toggle source
# File lib/tomereader/index.rb, line 16
def add(content)
  raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
  raise StandardError, "Content is empty" if content.empty?
  phrase_strings = split_into_phrases(content)
  split(content)
  self
end
check(word_string) click to toggle source
# File lib/tomereader/index.rb, line 72
def check(word_string)
  word_string.kind_of?(String) && suitable?(word_string)
end
create(word_string) click to toggle source
# File lib/tomereader/index.rb, line 67
def create(word_string)
  if check word_string
    @word_storage[word_string] = Word.new(word_string)
  end
end
find(word_string) click to toggle source
# File lib/tomereader/index.rb, line 62
def find(word_string)
  if @word_storage.has_key?(word_string)
    @word_storage[word_string]
  end
end
find_or_create(word_string) click to toggle source
# File lib/tomereader/index.rb, line 75
def find_or_create(word_string)
  find(word_string) || create(word_string)
end
phrases() click to toggle source
# File lib/tomereader/index.rb, line 29
def phrases
  @phrase_storage
end
split(content) click to toggle source

розбиває текст на фрази, витягує слова, встановлює звязки: фраза -> слова, та слово -> фрази

# File lib/tomereader/index.rb, line 47
  def split(content)
    split_into_phrases(content).map do |phrase_string|
      phrase = Phrase.new(phrase_string)
      phrase.split do |word_string, position|
        @total_words << word_string
        word = find_or_create(word_string)
        word.add(phrase, position) if word.is_a? Word
      end
      @phrase_storage << phrase
    end
end
split_into_phrases(content) click to toggle source
# File lib/tomereader/index.rb, line 23
def split_into_phrases(content)
  content.split @phrase_split_pattern
end
suitable?(word_string) click to toggle source

word word_storage

# File lib/tomereader/index.rb, line 59
def suitable? word_string
  word_string =~ @word_pattern
end
to_s() click to toggle source
# File lib/tomereader/index.rb, line 42
def to_s
  {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count}
end
words() click to toggle source
# File lib/tomereader/index.rb, line 26
def words
  @word_storage
end
words_sorted_by_alphabet(count=nil) click to toggle source
# File lib/tomereader/index.rb, line 32
def words_sorted_by_alphabet(count=nil)
  words = self.words.sort_by{|key, value| key}
  words = words.first(count) unless count.nil?
  Hash[words]
end
words_sorted_by_frequency(count=nil) click to toggle source
# File lib/tomereader/index.rb, line 37
def words_sorted_by_frequency(count=nil)
  words = self.words.sort_by{|key, value| value.frequency}
  words = words.first(count) unless count.nil?
  Hash[words]
end