class Bio::DB::Tag::MD

Attributes

cumulative[RW]
pairs[RW]
tag[RW]

Public Class Methods

new(data) click to toggle source
# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 9
def initialize(data)
                if data.is_a? String
                        if data.match(@@regexp)
                                @tag = $~[1]
                        elsif data.match(@@format)
                                #Assume tag given without MD:Z: leader
                                @tag = data
                        else
                                raise "Tag not of expected format."
                        end
                elsif data.is_a? Bio::DB::Tag
                        @tag = data.value
                        warn "Not an MD tag" if data.tag == "MD"
                else
                        raise "Tag not of expected format."
                end

        # Splits the string into operations using the splitter regexp class variable, returns array of two-element arrays describing operations
        spl = @tag.scan(@@splitter)
        # Returns an array of matches [match,substition,deletion]
        # Although regexp captures are named, these don't get included automatically with scan as it doesn't return MatchData objects.
        spl.map! do |a|
                array = [["m", a[0]],["s", a[1]],["d", a[2]]]
                # Only one of these will be non-nil
                array.keep_if{|i| i[1]}
                array.map!{|i| if i[0] == "m" then i[1] = i[1].to_i end; i}
                array[0]
        end
        @pairs = spl

        @cumulative = []
        cumulative_length = 0
        read_length = 0
        @pairs.each do |q|
                p = q.dup
                case p[0]
                        when "m"
                                len = p[1]
                                rlen = p[1]
                        when "s"
                                len = p[1].length
                                rlen = p[1].length
                        when "d"
                                len = p[1].length
                                # Deleted bases don't appear in the read, so don't count to the length
                                rlen = 0
                end
                # third element in each array will be the total preceding length on the reference, i.e. the position of the operation.
                # fourth element is similar for the read.
                @cumulative << p.dup.push(cumulative_length).push(read_length)
                cumulative_length += len
                read_length += rlen
        end
end

Public Instance Methods

deletions() click to toggle source
# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 64
def deletions
        report(/d/)
end
reconstruct_tag(array=@pairs) click to toggle source

Reconstruct a MD:Z tag from the pairs array

# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 84
def reconstruct_tag(array=@pairs)
        new_tag = []
        array.each do |p|
                case p[0]
                        when "m"
                                string = p[1].to_s
                        when "s"
                                string = p[1]
                        when "d"
                                string = "^"+p[1]
                end
                new_tag << string
        end
        new_tag.join("")
end
ref_length() click to toggle source

Sums the total length of the reference sequence represented by the MD:Z tag (or part of)

# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 103
def ref_length
        #Need the sum of all "movement" operations (i.e. numbers) as well as any substituted bases (count 1 each)
        if @tag =~ /^\d+$/
                @tag.to_i
        else
                temp_tag = @tag.dup
                temp_tag.gsub!(/\^/,"")  # Deletions need to be counted - sub the caret character out and count the remaining base characters
                movements = temp_tag.split(/[GATCN]+/).map(&:to_i).reduce(:+) # Sum numbers
                deletions = temp_tag.split(/\d+/).map(&:length).reduce(:+) # Sum number of base chars
                movements + deletions
        end
end
report(regexp=/[sd]/) click to toggle source

Report the positions of given events

# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 73
def report(regexp=/[sd]/)
        to_return = []
        @cumulative.each do |p|
                if p[0] =~ regexp
                        to_return << p
                end
        end
        to_return
end
slice(offset,length) click to toggle source

Given an offset in reference sequence and length, return an object corresponding to that subregion of the alignment

# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 116
def slice(offset,length)
        new_array = iterate_pairs(@pairs,offset,length,@@reference)
        # Return a MDZ instance with just the new alignment
        new_tag = reconstruct_tag(new_array)
        Bio::DB::Tag::MD.new(new_tag)
end
substitutions() click to toggle source
# File lib/bio-sam-mutation/bio/db/tag/md.rb, line 68
def substitutions
        report(/s/)
end