class String

Public Instance Methods

paragraph_summary( count=1, separator=" [...] " ) click to toggle source
# File lib/reclassifier/core_ext/string.rb, line 6
def paragraph_summary( count=1, separator=" [...] " )
   perform_lsi split_paragraphs, count, separator
end
split_paragraphs() click to toggle source
# File lib/reclassifier/core_ext/string.rb, line 14
def split_paragraphs
   split /(\n\n|\r\r|\r\n\r\n)/ # TODO: make this less primitive
end
split_sentences() click to toggle source
# File lib/reclassifier/core_ext/string.rb, line 10
def split_sentences
   split /(\.|\!|\?)/ # TODO: make this less primitive
end
summary( count=10, separator=" [...] " ) click to toggle source
# File lib/reclassifier/core_ext/string.rb, line 2
def summary( count=10, separator=" [...] " )
   perform_lsi split_sentences, count, separator
end

Private Instance Methods

perform_lsi(chunks, count, separator) click to toggle source
# File lib/reclassifier/core_ext/string.rb, line 20
def perform_lsi(chunks, count, separator)
   lsi = Reclassifier::LSI.new :auto_rebuild => false
   chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
   lsi.build_index
   summaries = lsi.highest_relative_content count
   return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
end