module BioDSL::Digest
Namespace for Digest
.
Public Instance Methods
each_digest(pattern, cut_pos) { |subseq| ... }
click to toggle source
Method to get the next digestion product from a sequence.
# File lib/BioDSL/seq/digest.rb, line 36 def each_digest(pattern, cut_pos) return to_enum(:each_digest, pattern, cut_pos) unless block_given? pattern = disambiguate(pattern) offset = 0 seq.upcase.scan pattern do pos = $`.length + cut_pos if pos >= 0 && pos < length - 2 subseq = self[offset...pos] subseq.seq_name = "#{seq_name}[#{offset}-#{pos - offset - 1}]" yield subseq end offset = pos end offset = 0 if offset < 0 || offset > length subseq = self[offset..-1] subseq.seq_name = "#{seq_name}[#{offset}-#{length - 1}]" yield subseq end
Private Instance Methods
disambiguate(pattern)
click to toggle source
Method that returns a regexp object with a restriction enzyme pattern with ambiguity codes substituted to the appropriate regexp.
# File lib/BioDSL/seq/digest.rb, line 66 def disambiguate(pattern) ambiguity = { 'A' => 'A', 'T' => 'T', 'U' => 'T', 'C' => 'C', 'G' => 'G', 'M' => '[AC]', 'R' => '[AG]', 'W' => '[AT]', 'S' => '[CG]', 'Y' => '[CT]', 'K' => '[GT]', 'V' => '[ACG]', 'H' => '[ACT]', 'D' => '[AGT]', 'B' => '[CGT]', 'N' => '[GATC]' } new_pattern = '' pattern.upcase.each_char do |char| if ambiguity[char] new_pattern << ambiguity[char] else fail DigestError, "Could not disambiguate residue: #{char}" end end Regexp.new(new_pattern) end