module Summarizr
Constants
- VERSION
Public Class Methods
importance_index(words)
click to toggle source
# File lib/summarizr.rb, line 4 def self.importance_index(words) if words.length < 50 (words.length / 20) elsif words.length < 250 (words.length / 40) elsif words.length < 500 (words.length / 60) end end
summarize(input)
click to toggle source
# File lib/summarizr.rb, line 14 def self.summarize(input) all_words = input.split(' ') threshold = importance_index(all_words) keywords = all_words.select { |e| all_words.count(e) > threshold && e.length > 3 }.uniq! if keywords == nil keywords = all_words.select { |e| all_words.count(e) > 2 && e.length > 3 }.uniq! end keywords.delete_if do |w| /(have|this|with|just|your|when|from|that|were|much|here|there|their|they)/i.match(w) end scores = {} paragraphs = input.split("\n") paragraphs.each do |paragraph| sentences = paragraph.split(". ") sentences.each do |sentence| if sentence == sentences[0] sentence_score = 1 else sentence_score = 0 end keywords.each do |word| if sentence =~ /#{word}/i sentence_score += 1 end end scores[sentence] = sentence_score end end max_score = scores.max_by { |k,v| v }[1] scores.delete_if {|k,v| v <= (max_score / 2)} scores.keys.flatten.join("\n") end