class Bio::Cigar
Attributes
cigar_string[RW]
Public Class Methods
new(cigar_string)
click to toggle source
# File lib/bio-cigar/cigar.rb, line 5 def initialize(cigar_string) @cigar_string = cigar_string end
Public Instance Methods
each_alignment_chunk() { |matches, to_i| ... }
click to toggle source
Yield the type and count for each different part of the cigar string e.g.
cigar = Bio::Cigar.new('1S3M') cigar.each_alignment_chunk do |type, count| type #=> first 'S', second 'M' (as strings) type #=> first 1, second 3 (as integers) end
# File lib/bio-cigar/cigar.rb, line 69 def each_alignment_chunk leftover = @cigar_string while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/) yield matches[2], matches[1].to_i leftover = matches[3] end unless leftover.length == 0 raise "Incorrect parsing of cigar string #{@cigar_string}, at the end left with #{leftover}" end end
percent_identity(reference_sequence_string, query_sequence_string)
click to toggle source
# File lib/bio-cigar/cigar.rb, line 9 def percent_identity(reference_sequence_string, query_sequence_string) num_match = 0 num_mismatch = 0 ref_index = 0 query_index = 0 each_alignment_chunk do |type, count| # puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}" # puts "#{type} #{count}" # puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }" # puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }" case type when 'I' # Extra characters in the query sequence num_mismatch += count query_index += count when 'D' num_mismatch += count ref_index += count when 'S' #ref_index += count query_index += count when 'H' query_index += count when 'P' # Do nothing when 'N' # long skip on the reference sequence ref_index += count else if %w(M = X).include?(type) # For = and X, ignore these and recalculate, for ease of programming this method. (0...count).each do |i| if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i] num_match += 1 else num_mismatch += 1 end end ref_index += count query_index += count else raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}" end end #puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}" end percent = num_match.to_f / (num_match+num_mismatch)*100 return percent, num_match, num_mismatch end