class NeedlemanWunschAligner

Finds the optimal alignment of two sequences using the Needleman-Wunsch algorithm. This basic implementation works with any Ruby object and just looks at object identity for the scoring algorithm.

See ExampleParagraphAndSentenceAligner for an example of a more sophisticated scoring algorithm.

Constants

VERSION

Public Class Methods

new(left_seq, top_seq, options={}) click to toggle source

@param left_seq [Array<Object>] sequence drawn at left of matrix @param top_seq [Array<Object>] sequence drawn at top of matrix @param options [Hash, optional] can be used to provide state to the aligner

which can be used, e.g., when computing the score.
One example use case is to pass in the maximum expected
offset, and then not compute the entire score matrix, but
only a band around the diagonal, wide enough to handle the
maximum expected offset.
# File lib/needleman_wunsch_aligner.rb, line 21
def initialize(left_seq, top_seq, options={})
  @left_seq = left_seq
  @top_seq = top_seq
  @options = options
end

Public Instance Methods

compute_score(left_el, top_el, row_index, col_index) click to toggle source

This is a basic implementation of the scoring algorithm. See ExampleParagraphAndSentenceAligner for a more complex scoring function. NOTE: You can use row_index and col_index to improve performance if you know the maximum offset between left_el and top_el. @param left_el [Object] @param top_el [Object] @param row_index [Integer] zero based row index, origin is at top. @param col_index [Integer] zero based column index, origin is to the left. @return [Numeric]

# File lib/needleman_wunsch_aligner.rb, line 44
def compute_score(left_el, top_el, row_index, col_index)
  left_el == top_el ? 1 : -3
end
default_gap_penalty() click to toggle source

Returns the default penalty for a gap. @return [Numeric]

# File lib/needleman_wunsch_aligner.rb, line 50
def default_gap_penalty
  -1
end
element_for_inspection_display(element, col_width = nil) click to toggle source

Transforms element to an object that can be used for display when inspecting an alignment @params element [Object] @return [Object]

# File lib/needleman_wunsch_aligner.rb, line 130
def element_for_inspection_display(element, col_width = nil)
  r = case element
  when String
    element
  else
    element.inspect
  end
  col_width ? r[0...col_width] : r
end
elements_are_equal_for_inspection(top_el, left_el) click to toggle source

Returns true if top_el and left_el are considered equal for inspection purposes @params top_el [Object] @params left_el [Object] @return [Boolean]

# File lib/needleman_wunsch_aligner.rb, line 145
def elements_are_equal_for_inspection(top_el, left_el)
  top_el == left_el
end
gap_indicator() click to toggle source

Returns a sequence element to indicate a gap. Needs to be compatible with other sequence elements and your scoring function. @return [Object]

# File lib/needleman_wunsch_aligner.rb, line 57
def gap_indicator
  nil
end
get_optimal_alignment() click to toggle source

Returns two arrays that represent the optimal alignment.

# File lib/needleman_wunsch_aligner.rb, line 28
def get_optimal_alignment
  @get_optimal_alignment ||= (
    construct_score_matrix_and_traceback_matrix
    compute_optimal_alignment
  )
end
inspect_alignment(col_width = 20) click to toggle source

Returns a string representation of the optimal alignment in two columns. @param col_width [Integer, optional] max width of each col in chars @return [String]

# File lib/needleman_wunsch_aligner.rb, line 64
def inspect_alignment(col_width = 20)
  aligned_left_seq, aligned_top_seq = get_optimal_alignment
  s = []
  aligned_left_seq.each_with_index do |left_el, idx|
    top_el = aligned_top_seq[idx]
    delimiter = if elements_are_equal_for_inspection(top_el, left_el)
      '=' # match
    elsif gap_indicator == top_el
      '-' # delete
    elsif gap_indicator == left_el
      '+' # insert
    else
      '!' # mismatch
    end
    s << [
      element_for_inspection_display(left_el, col_width).rjust(col_width),
      element_for_inspection_display(top_el, col_width).ljust(col_width),
    ].join("  #{ delimiter }  ")
  end
  s.join("\n")
end
inspect_matrix(which_matrix, col_width = 3) click to toggle source

Returns string representation of either the score or the traceback matrix. @param which_matrix [Symbol] one of :traceback or :score @param col_width [Integer, optional], defaults to 3 @return [String]

# File lib/needleman_wunsch_aligner.rb, line 90
def inspect_matrix(which_matrix, col_width = 3)
  get_optimal_alignment  # make sure we have computed the matrices
  the_matrix = case which_matrix
  when :traceback
    @traceback_matrix
  when :score
    @score_matrix
  else
    raise "Handle this: #{ which_matrix.inspect }"
  end

  s = ''
  s << 'left_seq = ' + @left_seq.join + "\n"
  s << 'top_seq = ' + @top_seq.join + "\n"
  s <<  "\n"
  # Header row
  s << ' ' * 2 * col_width
  @top_seq.each_index { |e| s << @top_seq[e].to_s.rjust(col_width) }
  s << "\n"

  traverse_score_matrix do |row, col|
    if 0 == col and 0 == row
      # first column in first row
      s << ' '.rjust(col_width)
    elsif 0 == col
      # first col in subsequent rows
      s << @left_seq[row - 1].to_s.rjust(col_width)
    end
    # subsequent cells
    s << the_matrix[row][col].to_s.rjust(col_width)
    # finalize row
    s << "\n" if col == the_matrix[row].length - 1
  end
  s
end

Protected Instance Methods

compute_optimal_alignment() click to toggle source
# File lib/needleman_wunsch_aligner.rb, line 187
def compute_optimal_alignment
  row = @score_matrix.length-1
  col = @score_matrix[0].length-1
  left = Array.new
  top = Array.new
  while row > 0 or col > 0
    case @traceback_matrix[row][col]
    when '⬉'
      left.push(@left_seq[row-1])
      top.push(@top_seq[col-1])
      row -= 1
      col -= 1
    when '←'
      left.push(gap_indicator)
      top.push @top_seq[col-1]
      col -= 1
    when '↑'
      left.push @left_seq[row-1]
      top.push(gap_indicator)
      row -= 1
    else
      raise "Handle this"
    end
  end
  [left.reverse, top.reverse]
end
construct_score_matrix_and_traceback_matrix() click to toggle source
# File lib/needleman_wunsch_aligner.rb, line 151
def construct_score_matrix_and_traceback_matrix
  initialize_score_matrix_and_traceback_matrix
  traverse_score_matrix do |row, col|
    if 0 == row && 0 == col # top left cell
      @score_matrix[0][0] = 0
      @traceback_matrix[0][0] = 'x'
    elsif 0 == row # first row
      @score_matrix[0][col] = col * default_gap_penalty
      @traceback_matrix[0][col] = '←'
    elsif 0 == col # first col
      @score_matrix[row][0] = row * default_gap_penalty
      @traceback_matrix[row][0] = '↑'
    else # other cells
      # compute scores
      from_top = @score_matrix[row-1][col] + default_gap_penalty
      from_left = @score_matrix[row][col-1] + default_gap_penalty
      # @left_seq and @top_seq are off by 1 because we added cells for gaps in the matrix
      score = compute_score(
        @left_seq[row-1],
        @top_seq[col-1],
        row-1,
        col-1
      )
      from_top_left = @score_matrix[row-1][col-1] + score

      # find max score and direction
      max, direction = [from_top_left, '⬉']
      max, direction = [from_top, '↑']  if from_top > max
      max, direction = [from_left, '←']  if from_left > max

      @score_matrix[row][col] = max
      @traceback_matrix[row][col] = direction
    end
  end
end
initialize_score_matrix_and_traceback_matrix() click to toggle source
# File lib/needleman_wunsch_aligner.rb, line 222
def initialize_score_matrix_and_traceback_matrix
  @score_matrix = Array.new(@left_seq.length + 1)
  @traceback_matrix = Array.new(@left_seq.length + 1)

  @score_matrix.each_index do |row|
    @score_matrix[row] = Array.new(@top_seq.length + 1)
    @traceback_matrix[row] = Array.new(@top_seq.length + 1)
  end
end
traverse_score_matrix() { |row, col| ... } click to toggle source
# File lib/needleman_wunsch_aligner.rb, line 214
def traverse_score_matrix
  @score_matrix.each_index do |row|
    @score_matrix[row].each_index do |col|
      yield(row, col)
    end
  end
end