class Markov::Parser

Public Class Methods

new() click to toggle source
# File lib/markov/parser.rb, line 4
def initialize
  @split_sentence = /(?<=[.?!])\s+/
  @split_words = /([,.?!])|[\s]/
  @replace_chars = /[„':;_"()]/
  
  @unparsed_sentences = []
  @tokens = []
end

Public Instance Methods

load_text(source) click to toggle source
# File lib/markov/parser.rb, line 19
def load_text(source)
  
  if File.exists?(source)
    sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
  else
    raise FileNotFoundError.new("#{source} does not exist!")
  end
  
  sentences.each do |sentence|
    add_unparsed_sentence sentence
  end
  
end
next_token() click to toggle source
# File lib/markov/parser.rb, line 33
def next_token
  
  if @tokens.empty?
    sentence = @unparsed_sentences.slice!(0)
    if sentence
      sentence.each do |word|
        
        if word.include?(",")
          @tokens << Markov::Token.new(",", :special)
        elsif word.include?("?")
          @tokens << Markov::Token.new("?", :stop)
        elsif word.include?("!")
          @tokens << Markov::Token.new("!", :stop)
        elsif word.include?(".")
          @tokens << Markov::Token.new(".", :stop)
        elsif word == ""
          # skip blanks
        else
          @tokens << Markov::Token.new(word, :word)
        end            
      end
    else
      @tokens = nil
    end
  end
  
  return @tokens.slice!(0) if @tokens
  
  @tokens = []
  nil
end

Private Instance Methods

add_unparsed_sentence(sentence) click to toggle source
# File lib/markov/parser.rb, line 67
def add_unparsed_sentence(sentence)
  
  sentence.gsub!(@replace_chars, "")
  words = sentence.split(@split_words)
  if words && !words.empty?
    @unparsed_sentences << words
  end
  
end