class Summary

Public Class Methods

get_dummary_ratio(content, summary) click to toggle source
# File lib/simple_summary.rb, line 4
def self.get_dummary_ratio content, summary
  100 - (100 * (summary.length.to_f / content.to_f))
end
squash(content) click to toggle source
# File lib/simple_summary.rb, line 8
def self.squash content
  sentences_dic = self.get_senteces_ranks(content)
  summary = self.get_summary(content, sentences_dic)
end

Private Class Methods

format_sentence(sentence) click to toggle source
# File lib/simple_summary.rb, line 27
def self.format_sentence sentence
  sentence.gsub(/[^0-9a-z ]/i, '')
end
get_best_sentence(paragraph, sentences_dic) click to toggle source
# File lib/simple_summary.rb, line 54
def self.get_best_sentence paragraph, sentences_dic
  # Split the paragraph into sentences
  sentences = self.split_content_to_sentences(paragraph)
  best_sentence = ""
  max_value = 0
  # Ignore short paragraphs
  if sentences.count > 2
    # Get the best sentence according to the sentences dictionary
    for s in sentences
      strip_s = self.format_sentence(s)
      if strip_s
        if sentences_dic[strip_s] > max_value
          max_value = sentences_dic[strip_s]
          best_sentence = s
        end
      end
    end
  end

  best_sentence
end
get_senteces_ranks(content) click to toggle source
# File lib/simple_summary.rb, line 31
def self.get_senteces_ranks content
  sentences = self.split_content_to_sentences(content)
  n = sentences.count
  values = Array.new(n, 0) { Array.new(n, 0) }

  for i in 0..n-1
    for j in 0..n-1
      values[i][j] = self.sentences_intersection(sentences[i], sentences[j])
    end
  end

  sentences_dic = {}
  for i in 0..n-1
    score = 0
    for j in 0..n-1
      next if i == j
      score += values[i][j]
      sentences_dic[self.format_sentence(sentences[i])] = score
    end
  end
  sentences_dic
end
get_summary(content, sentences_dic) click to toggle source
# File lib/simple_summary.rb, line 76
def self.get_summary content, sentences_dic
  # Split the content into paragraphs
  paragraphs = self.split_content_to_paragraphs(content)

  # Add the title
  summary = []

  # Add the best sentence from each paragraph
  for p in paragraphs
    sentence = self.get_best_sentence(p, sentences_dic).strip()
    if sentence
      summary.push(sentence)
    end
  end    

  summary.join("\n")
end
sentences_intersection(sent1, sent2) click to toggle source
# File lib/simple_summary.rb, line 23
def self.sentences_intersection sent1, sent2
  (sent1.scan(/./) & sent2.scan(/./)).length.to_f / ((sent1.length + sent2.length).to_f / 2.0).to_f || 0
end
split_content_to_paragraphs(content) click to toggle source
# File lib/simple_summary.rb, line 19
def self.split_content_to_paragraphs content
  content.split("\n\n")
end
split_content_to_sentences(content) click to toggle source
# File lib/simple_summary.rb, line 15
def self.split_content_to_sentences content
  content.gsub("\n",". ").split(". ")
end