module DocumentSimilarity
Constants
- VERSION
Public Class Methods
similarity(document, another_document)
click to toggle source
# File lib/document_similarity.rb, line 5 def self.similarity(document, another_document) document_words = document.downcase.split(' ') another_document_words = another_document.downcase.split(' ') all_words = (document_words + another_document_words).uniq document_vector = self.document_vector(document_words, all_words) another_document_vector = self.document_vector(another_document_words, all_words) cosine_similarity = document_vector.inner_product(another_document_vector) / (document_vector.norm * another_document_vector.norm) cosine_similarity end
Private Class Methods
document_vector(document_words, all_words)
click to toggle source
# File lib/document_similarity.rb, line 17 def self.document_vector(document_words, all_words) Vector.elements(all_words.map { |word| document_words.select { |document_word| document_word == word }.size / document_words.size.to_f }) end