module ClassifierReborn::Summarizer
Public Instance Methods
paragraph_summary(str, count = 1, separator = ' [...] ')
click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 13 def paragraph_summary(str, count = 1, separator = ' [...] ') perform_lsi split_paragraphs(str), count, separator end
perform_lsi(chunks, count, separator)
click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 25 def perform_lsi(chunks, count, separator) lsi = ClassifierReborn::LSI.new auto_rebuild: false chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 } lsi.build_index summaries = lsi.highest_relative_content count summaries.reject { |chunk| !summaries.include? chunk }.map(&:strip).join(separator) end
split_paragraphs(str)
click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 21 def split_paragraphs(str) str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive end
split_sentences(str)
click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 17 def split_sentences(str) str.split(/(\.|\!|\?)/) # TODO: make this less primitive end
summary(str, count = 10, separator = ' [...] ')
click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 9 def summary(str, count = 10, separator = ' [...] ') perform_lsi split_sentences(str), count, separator end