module BioDSL::Digest

Namespace for Digest.

Public Instance Methods

each_digest(pattern, cut_pos) { |subseq| ... } click to toggle source

Method to get the next digestion product from a sequence.

# File lib/BioDSL/seq/digest.rb, line 36
def each_digest(pattern, cut_pos)
  return to_enum(:each_digest, pattern, cut_pos) unless block_given?
  pattern = disambiguate(pattern)
  offset  = 0

  seq.upcase.scan pattern do
    pos = $`.length + cut_pos

    if pos >= 0 && pos < length - 2
      subseq = self[offset...pos]
      subseq.seq_name = "#{seq_name}[#{offset}-#{pos - offset - 1}]"

      yield subseq
    end

    offset = pos
  end

  offset = 0 if offset < 0 || offset > length
  subseq = self[offset..-1]
  subseq.seq_name = "#{seq_name}[#{offset}-#{length - 1}]"

  yield subseq
end

Private Instance Methods

disambiguate(pattern) click to toggle source

Method that returns a regexp object with a restriction enzyme pattern with ambiguity codes substituted to the appropriate regexp.

# File lib/BioDSL/seq/digest.rb, line 66
def disambiguate(pattern)
  ambiguity = {
    'A' => 'A',
    'T' => 'T',
    'U' => 'T',
    'C' => 'C',
    'G' => 'G',
    'M' => '[AC]',
    'R' => '[AG]',
    'W' => '[AT]',
    'S' => '[CG]',
    'Y' => '[CT]',
    'K' => '[GT]',
    'V' => '[ACG]',
    'H' => '[ACT]',
    'D' => '[AGT]',
    'B' => '[CGT]',
    'N' => '[GATC]'
  }

  new_pattern = ''

  pattern.upcase.each_char do |char|
    if ambiguity[char]
      new_pattern << ambiguity[char]
    else
      fail DigestError, "Could not disambiguate residue: #{char}"
    end
  end

  Regexp.new(new_pattern)
end