class TextQuantity
Public Instance Methods
number_of_unique_tokens(text_array)
click to toggle source
# File lib/automated_metareview/text_quantity.rb, line 5 def number_of_unique_tokens(text_array) pre_string = "" #preString helps keep track of the text that has been checked for unique tokens and text that has not count = 0 #counts the number of unique tokens instance = WordnetBasedSimilarity.new text_array.each{ |text| tp = TextPreprocessing.new text = tp.contains_punct(text) all_tokens = text.split(" ") all_tokens.each{ |token| if(!instance.is_frequent_word(token.downcase)) #do not count this word if it is a frequent word if(!pre_string.downcase.include?(token.downcase)) #if the token was not already seen earlier i.e. not a part of the preString count+=1 end end pre_string = pre_string +" " + token.downcase #adding token to the preString } } return count end