class PredictClass

Public Instance Methods

compare_edges(e1, e2, wordnet) click to toggle source
# File lib/automated_metareview/predict_class.rb, line 89
def compare_edges(e1, e2, wordnet)
  speller = Aspell.new("en_US")
  speller.suggestion_mode = Aspell::NORMAL
  
  avg_match_without_syntax = 0
  #compare edges so that only non-nouns or non-subjects are compared
  # if(!e1.in_vertex.pos_tag.include?("NN") and !e1.out_vertex.pos_tag.include?("NN"))
    avg_match_without_syntax = (wordnet.compare_strings(e1.in_vertex, e2.in_vertex, speller) + 
                              wordnet.compare_strings(e1.out_vertex, e2.out_vertex, speller))/2.to_f
  # elsif(!e1.in_vertex.pos_tag.include?("NN"))
    # avg_match_without_syntax = wordnet.compare_strings(e1.in_vertex, e2.in_vertex, speller)
  # elsif(!e1.out_vertex.pos_tag.include?("NN"))
    # avg_match_without_syntax = wordnet.compare_strings(e1.out_vertex, e2.out_vertex, speller)
  # end
  
  avg_match_with_syntax = 0
  #matching in-out and out-in vertices
  # if(!e1.in_vertex.pos_tag.include?("NN") and !e1.out_vertex.pos_tag.include?("NN"))
  avg_match_with_syntax = (wordnet.compare_strings(e1.in_vertex, e2.out_vertex, speller) + 
                              wordnet.compare_strings(e1.out_vertex, e2.in_vertex, speller))/2.to_f
  # elsif(!e1.in_vertex.pos_tag.include?("NN"))
    # avg_match_with_syntax = wordnet.compare_strings(e1.in_vertex, e2.out_vertex, speller)
  # elsif(!e1.out_vertex.pos_tag.include?("NN"))
    # avg_match_with_syntax = wordnet.compare_strings(e1.out_vertex, e2.in_vertex, speller)
  # end
  
  if(avg_match_without_syntax > avg_match_with_syntax)
    return avg_match_without_syntax
  else
    return avg_match_with_syntax
  end
end
compare_review_with_patterns(single_edges, single_patterns, wordnet) click to toggle source
# File lib/automated_metareview/predict_class.rb, line 47
def compare_review_with_patterns(single_edges, single_patterns, wordnet)
  final_class_sum = 0.0
  final_edge_num = 0
  single_edge_matches = Array.new(single_edges.length){Array.new}
  #resetting the average_match values for all the edges, before matching with the single_patterns for a new class
  for i in 0..single_edges.length - 1
    if(!single_edges[i].nil?)
      single_edges[i].average_match = 0
    end  
  end
  
  #comparing each single edge with all the patterns
  for i in (0..single_edges.length - 1)  #iterating through the single edges
    max_match = 0
    if(!single_edges[i].nil?)
      for j in (0..single_patterns.length - 1) 
        if(!single_patterns[j].nil?)
          single_edge_matches[i][j] = compare_edges(single_edges[i], single_patterns[j], wordnet)
          if(single_edge_matches[i][j] > max_match)
            max_match = single_edge_matches[i][j]
          end 
        end 
      end #end of for loop for the patterns
      single_edges[i].average_match = max_match  
      
      #calculating class average
      if(single_edges[i].average_match != 0.0)
        final_class_sum = final_class_sum + single_edges[i].average_match
        final_edge_num+=1
      end
    end #end of the if condition
  end #end of for loop
  
  if(final_edge_num == 0)
    final_edge_num = 1  
  end
  
  # puts("final_class_sum:: #{final_class_sum} final_edge_num:: #{final_edge_num} Class average #{final_class_sum/final_edge_num}")
  return final_class_sum/final_edge_num #maxMatch
end
predict_classes(pos_tagger, core_NLP_tagger, review_text, review_graph, pattern_files_array, num_classes) click to toggle source
Identifies the probabilities of a review belonging to each of the three classes. 
Returns an array of probablities (length = numClasses)

predicting the review’s class

# File lib/automated_metareview/predict_class.rb, line 10
def predict_classes(pos_tagger, core_NLP_tagger, review_text, review_graph, pattern_files_array, num_classes)
  #reading the patterns from the pattern files
  patterns_files = Array.new
  pattern_files_array.each do |file|
    patterns_files << file #collecting the file names for each class of patterns
  end
  
  tc = TextPreprocessing.new
  single_patterns = Array.new(num_classes){Array.new}
  #reading the patterns from each of the pattern files
  for i in (0..num_classes - 1) #for every class
    #read_patterns in TextPreprocessing helps read patterns in the format 'X = Y'
    single_patterns[i] = tc.read_patterns(patterns_files[i], pos_tagger) 
  end
  
  #Predicting the probability of the review belonging to each of the content classes
  wordnet = WordnetBasedSimilarity.new
  max_probability = 0.0
  class_value = 0          
  edges = review_graph.edges
  class_prob = Array.new #contains the probabilities for each of the classes - it contains 3 rows for the 3 classes
  #comparing each test review text with patterns from each of the classes
  for k in (0..num_classes - 1)
    #comparing edges with patterns from a particular class
    class_prob[k] = compare_review_with_patterns(edges, single_patterns[k], wordnet)/6.to_f #normalizing the result
    #we divide the match by 6 to ensure the value is in the range of [0-1]
  end #end of for loop for the classes
  
  #printing the probability values
  # puts("########## Probability for test review:: "+review_text[0]+" is::")
  # for k in (0..num_classes - 1)
    # puts "class_prob[#{k}] .. #{class_prob[k]}"
  # end
  return class_prob
end