class SimilarityTree::SimilarityTree

Constructs a hierarchy of nodes based on a specified root and the similarity “scores” between nodes. Each nodes is placed next to the node to which it is most similar; as between two nodes, the node most similar to the root is placed closest to the root.

Public Class Methods

new(root_id, similarity_matrix, score_threshold = 0) click to toggle source

initialize/build the tree hierarchy from an existing similarity matrix

# File lib/similarity_tree/similarity_tree.rb, line 7
def initialize(root_id, similarity_matrix, score_threshold = 0)
  @nodes = similarity_matrix.map {|key, row| Node.new(key, 0)}
  @root = @nodes.find {|n| n.id == root_id}
  @root.diff_score = nil
  @similarity_matrix = similarity_matrix
  @score_threshold = score_threshold
end

Public Instance Methods

build() click to toggle source

build the tree and return the root node

# File lib/similarity_tree/similarity_tree.rb, line 16
def build
  build_tree
  @root
end

Private Instance Methods

build_tree() click to toggle source
# File lib/similarity_tree/similarity_tree.rb, line 22
def build_tree
  tree = @root
  flat = [@root]

  # for each non-root node
  @nodes.delete_if{|n| n == @root}.map do |n|
    # find the best match to the nodes already in the tree
    closest_diff_score = 0
    closest = nil
    flat.each do |m|
      diff_score = @similarity_matrix[n.id][m.id]
      if closest.nil? || (diff_score > closest_diff_score)
        closest_diff_score = diff_score
        closest = m
      end
    end

    # if the closest match is the root node, or if the closest match's diff score with it's parent is stronger
    # than between the present node and that parent, add as a child of the match
    if (closest == @root) || (closest.diff_score >= @similarity_matrix[n.id][closest.parent.id])
      n.parent = closest
      closest.children << n
      n.diff_score = @similarity_matrix[n.id][closest.id]
      # else, if the new node is more similar to the parent, rotate so that the existing node becomes the child
    else
      # place children with the closest matching of the two
      closest.children.dup.each do |child|
        if @similarity_matrix[child.id][n.id] > child.diff_score
          child.parent = n
          closest.children.delete_if{|child_i| child_i == child }
          n.children << child
          child.diff_score = @similarity_matrix[child.id][n.id]
        end
      end

      # connect the new node to the parent
      n.parent = closest.parent
      n.parent.children << n
      n.diff_score = @similarity_matrix[n.id][n.parent.id]

      # add the existing node as a child
      closest.parent = n
      n.parent.children.delete_if{|child_i| child_i == closest}
      n.children << closest
      closest.diff_score = @similarity_matrix[closest.id][n.id]
    end

    flat << n
  end
  prune(flat)
end
prune(nodes) click to toggle source

prune away nodes that don’t meet the configured score threshold

# File lib/similarity_tree/similarity_tree.rb, line 75
def prune(nodes)
  nodes.each do |node|
    node.parent.children.reject!{|n| n == node} if (node != @root) && (node.diff_score < @score_threshold)
  end
end