class ChainPunk::Corpus
Attributes
frequency_table[R]
seeds[R]
Public Class Methods
new(text, options = {})
click to toggle source
# File lib/chain_punk/corpus.rb, line 7 def initialize(text, options = {}) train(text, options) end
Public Instance Methods
train(text, options = {})
click to toggle source
# File lib/chain_punk/corpus.rb, line 11 def train(text, options = {}) exclusion_text = remove_exclusions(text, options[:exclusions]) text_phrases = process_sets(exclusion_text, options[:closures]) grapheme_phrases = process_phrases(text_phrases, options[:boundaries]) @frequency_table, @seeds = process_graphemes(grapheme_phrases, options[:index_size]) end
Private Instance Methods
process_graphemes(grapheme_phrases, index_size = 1, frequency_table = {}, seeds = [])
click to toggle source
# File lib/chain_punk/corpus.rb, line 52 def process_graphemes(grapheme_phrases, index_size = 1, frequency_table = {}, seeds = []) index_size ||= 1 grapheme_phrases.each do |phrase| seeds << phrase[0, index_size] while phrase.size > index_size (frequency_table[phrase[0, index_size]] ||= []) << phrase[index_size, index_size] phrase.shift end end [frequency_table, seeds] end
process_phrases(phrases, boundaries = nil)
click to toggle source
# File lib/chain_punk/corpus.rb, line 36 def process_phrases(phrases, boundaries = nil) grapheme_phrases = [] until phrases.empty? grapheme_phrases << split_phrase(phrases[0], boundaries) phrases.shift end grapheme_phrases end
process_sets(text, closures = nil)
click to toggle source
# File lib/chain_punk/corpus.rb, line 30 def process_sets(text, closures = nil) return [text] if closures.nil? text.split(Regexp.union(closures)).reject(&:empty?) end
remove_exclusions(text, exclusions = nil)
click to toggle source
# File lib/chain_punk/corpus.rb, line 20 def remove_exclusions(text, exclusions = nil) return text if exclusions.nil? exclusions.each do |exclusion| text = text.gsub(exclusion, '') end text end
split_phrase(phrase, boundaries = nil)
click to toggle source
# File lib/chain_punk/corpus.rb, line 46 def split_phrase(phrase, boundaries = nil) return phrase.to_s.chars if boundaries.nil? phrase.split(Regexp.union(boundaries)).reject(&:empty?) end