class BioDSL::Mummer

Class for executing MUMmer and parsing MUMmer results.

Constants

Match

Public Class Methods

each_mem(seq1, seq2, options = {}) { |mem| ... } click to toggle source

@param seq1 [BioDSL::Seq] Sequence 1. @param seq2 [BioPeices::Seq] Sequence 2. @param options [Hash] Options hash.

@yield [Mummer::Match] A match object @return [Enumerable] An Enumerable

# File lib/BioDSL/mummer.rb, line 42
def self.each_mem(seq1, seq2, options = {})
  mummer = new(seq1, seq2, options)

  if block_given?
    mummer.each_mem { |mem| yield mem }
  else
    mummer.each_mem
  end
end
new(seq1, seq2, options = {}) click to toggle source

Constructor for Mummer class.

@param seq1 [BioDSL::Seq] Sequence 1. @param seq2 [BioPeices::Seq] Sequence 2. @param options [Hash] Options hash.

@return [Mummer] Class instance.

# File lib/BioDSL/mummer.rb, line 59
def initialize(seq1, seq2, options = {})
  @seq1    = seq1
  @seq2    = seq2
  @options = options
  @command = []
  @q_id    = nil
  @dir     = nil

  default_options
  check_options
end

Public Instance Methods

each_mem() { |match| ... } click to toggle source

@yield [Mummer::Match] A match object @return [Enumerable] An Enumerable

# File lib/BioDSL/mummer.rb, line 73
def each_mem
  return to_enum :each_mem unless block_given?

  TmpDir.create('in1', 'in2', 'out') do |file_in1, file_in2, file_out|
    BioDSL::Fasta.open(file_in1, 'w') { |io| io.puts @seq1.to_fasta }
    BioDSL::Fasta.open(file_in2, 'w') { |io| io.puts @seq2.to_fasta }

    execute(file_in1, file_in2, file_out)

    File.open(file_out) do |io|
      while (match = get_match(io))
        yield match
      end
    end
  end
end

Private Instance Methods

check_direction() click to toggle source

Check that the value of :direction is OK.

@raise [BioDSL::MummerError] on bad direction.

# File lib/BioDSL/mummer.rb, line 149
def check_direction
  return if @options[:direction] == :forward ||
            @options[:direction] == :reverse ||
            @options[:direction] == :both

  fail MummerError, "Bad direction: #{@options[:direction]}"
end
check_length_min_type() click to toggle source

Check that the type of :length_min is OK.

@raise [BioDSL::MummerError] on bad length_min type.

# File lib/BioDSL/mummer.rb, line 140
def check_length_min_type
  return if @options[:length_min].class == Fixnum

  fail MummerError, "Bad length_min type: #{@options[:length_min].class}"
end
check_length_min_value() click to toggle source

Check the that the value of :length_min is OK.

@raise [BioDSL::MummerError] on bad length_min value.

# File lib/BioDSL/mummer.rb, line 131
def check_length_min_value
  return if @options[:length_min] > 0

  fail MummerError, "Bad length_min: #{@options[:length_min]}"
end
check_options() click to toggle source

Check that the options are OK

# File lib/BioDSL/mummer.rb, line 122
def check_options
  check_length_min_value
  check_length_min_type
  check_direction
end
compile_command(file_in1, file_in2, file_out) click to toggle source

Compile a command for execution of mummer.

@param file_in1 [String] Path to sequence filen. @param file_in1 [String] Path to sequence filen. @param file_out [String] Path to output file.

@return [String] Command string.

# File lib/BioDSL/mummer.rb, line 185
def compile_command(file_in1, file_in2, file_out)
  @command << 'mummer'
  @command << '-c' # report position of revcomp match relative to query seq.
  @command << '-L' # show length of query seq in header.
  @command << '-F' # force 4-column output.
  @command << "-l #{@options[:length_min]}"
  @command << '-n' # nucleotides only [atcg].

  case @options[:direction]
  when :reverse then @command << '-r' # only compute reverse matches.
  when :both    then @command << '-b' # compute forward and reverse matches.
  end

  @command << file_in1
  @command << file_in2
  @command << "> #{file_out}"
  @command << '2>&1' unless BioDSL.verbose

  @command.join(' ')
end
default_options() click to toggle source

Set some sensible default options.

# File lib/BioDSL/mummer.rb, line 158
def default_options
  @options[:length_min] ||= 20
  @options[:direction]  ||= :both
end
execute(file_in1, file_in2, file_out) click to toggle source

Execute MUMmer.

@param file_in1 [String] Path to sequence filen. @param file_in1 [String] Path to sequence filen. @param file_out [String] Path to output file.

# File lib/BioDSL/mummer.rb, line 168
def execute(file_in1, file_in2, file_out)
  cmd = compile_command(file_in1, file_in2, file_out)

  $stderr.puts "Running command: #{cmd}" if BioDSL.verbose

  system(cmd)

  fail "Error running command: #{cmd}" unless $CHILD_STATUS.success?
end
get_match(io) click to toggle source

Get a match if possible.

@param io [IO] IO stream.

@return [Match, nil] match or nil whether a match was found.

# File lib/BioDSL/mummer.rb, line 97
def get_match(io)
  io.each do |line|
    line.chomp!

    case line
    when /^> (\S+)\s+Reverse\s+Len = \d+$/
      @q_id  = Regexp.last_match(1)
      @dir   = 'reverse'
    when /^> (\S+)\s+Len = \d+$/
      @q_id  = Regexp.last_match(1)
      @dir   = 'forward'
    when /^\s*(.\S+)\s+(\d+)\s+(\d+)\s+(\d+)$/
      s_id    = Regexp.last_match(1)
      s_beg   = Regexp.last_match(2).to_i - 1
      q_beg   = Regexp.last_match(3).to_i - 1
      hit_len = Regexp.last_match(4).to_i

      return Match.new(@q_id, s_id, @dir, s_beg, q_beg, hit_len)
    end
  end

  nil
end