class Object
Public Class Methods
new(tag_string=nil)
click to toggle source
# File lib/bio-sam-mutation/bio/db/tag.rb, line 2 def initialize(tag_string=nil) set(tag_string) if tag_string end
Public Instance Methods
add_tag(tag)
click to toggle source
# File lib/bio-sam-mutation/bio/db/alignment.rb, line 21 def add_tag(tag) dup.add_tag!(tag) end
add_tag!(new_tag)
click to toggle source
# File lib/bio-sam-mutation/bio/db/alignment.rb, line 8 def add_tag!(new_tag) if new_tag.is_a? String new_tag = Bio::DB::Tag.new(new_tag) # new_tag_obj.set(new_tag) # new_tag = new_tag_obj else raise "Tag not recognised - pass a string or Bio::DB::Tag object" unless new_tag.is_a? Bio::DB::Tag end @tags[new_tag.tag] = new_tag regenerate_string end
mutations(offset=1, length=nil, translation_start=1)
click to toggle source
Call mutations Want to be able to give a length and offset - use this to generate appropriate sub CIGARs, subMDs & call
# File lib/bio-sam-mutation/bio/db/alignment.rb, line 71 def mutations offset=1, length=nil, translation_start=1 return nil if @query_unmapped cigar = Bio::Alignment::CIGAR.new(@cigar,seq,source="sam") length ||= cigar.reference_length - offset return nil if offset+length > cigar.reference_length seq = Bio::Sequence::NA.new(@seq) @cigar_obj = cigar # Generate subalignments from the CIGAR and MD:Z subcigar = cigar.subalignment(offset,length) mdz = Bio::DB::Tag::MD.new(@tags["MD"].value) mdz = mdz.slice(offset,length) # Get inserted bases from the read sequence, only within the region of interest insertions = [] insertion_positions = subcigar.positions(/I/) unless insertion_positions.empty? insertion_positions["I"].each do |ins| # Sam.seq returns a Sequence::NA object # Need a -1 as ruby counts characters # Use ins[2] to retrieve the base as this is the position on query. ins[1] is position on reference, used to annotate position. i = seq.seq[(offset+ins[2]-1),ins[1]] insertions << i end end first_match = true total = 0 mutations = [] reference_pos = @pos - 1 subcigar.pairs.each do |pair| case pair[0] when "M" #break if first_match == false reference_pos += pair[1] total += pair[1] first_match = false # Call deletions using the MD:Z tag - avoid need to supply reference seq. when "D" # Deletions are called below but still need to count here reference_pos += pair[1] when "I" mut = Bio::Mutation.new mut.type = :insertion mut.reference = nil mut.position = reference_pos + offset - translation_start bases = insertions.shift mut.mutant = bases ? bases.upcase : "N" mut.seqname = @rname.to_s mutations << mut end end # Now substitutions & deletions - these need the MD tag sub_pos = mdz.report(/[sd]/) previous_sub_position = 0 unless sub_pos.empty? sub_pos.each do |p| # Reference base is in the MD:Z tag (p[1] here), for the actual base need to go to the read # p[3] is the length of operations preceding the substitution on the read, p[2] on the reference. # p[2] and p[3] are defined on the subalignment, so should add them onto the preceding. # Need to add in any inserted bases from the CIGAR string using query_length preceding = cigar.subalignment(0,offset-1) # Masked length is not included in the MD:Z string so need to add it read_position = preceding.query_length+preceding.masked_length+p[3] # This is the adjustment needed to get the correct annotation: substart = @pos + offset - translation_start - 1 case p[0] when "s" mut = Bio::Mutation.new mut.type = :substitution mut.position = substart+p[2] + 1 mut.reference = p[1].upcase mut.mutant = seq[read_position,p[1].length].upcase mut.seqname = @rname.to_s mutations << mut when "d" mut = Bio::Mutation.new mut.type = :deletion mut.reference = p[1].upcase mut.position = substart+p[2] + 1 mut.mutant = nil mut.seqname = @rname.to_s mutations << mut end end end # mutations.length > 0 ? mutations.sort{|x,y| x.position.to_i <=> y.position.to_i} : nil mutations.length > 0 ? Bio::MutationArray.new(mutations.sort) : nil end
query(offset=1, length=@seq.length, reference_pos=@pos-1, ins_chr="_")
click to toggle source
Output a representation of the query sequence
# File lib/bio-sam-mutation/bio/db/alignment.rb, line 26 def query offset=1, length=@seq.length, reference_pos=@pos-1, ins_chr="_" mutations = self.mutations(offset,length) cigar = Bio::Alignment::CIGAR.new(@cigar,seq,source="sam") preceding = cigar.subalignment(0,offset-1) preceding_diff = preceding.query_length-(offset-1) pointer = preceding.query_length output = [] deletions = 0 insertions = 0 if mutations mutations.each do |mut| mut.position = mut.position + insertions - deletions + preceding_diff case mut.type when :deletion # position for deletion is the first deleted base fillin = mut.position-1-reference_pos-1 output << @seq[pointer..fillin] if fillin > pointer mut.reference.length.times{ output << "-" } pointer += fillin - pointer + 1 deletions += mut.reference.length when :insertion # position for insertion is the base we want fillin = mut.position-reference_pos-1 output << @seq[pointer..fillin] if fillin > pointer output << ins_chr + mut.mutant.downcase + ins_chr pointer += fillin - pointer + 1 + mut.mutant.length insertions += mut.mutant.length when :substitution # position for substitution is the first subbed base fillin = mut.position-1-reference_pos-1 output << @seq[pointer..fillin] if fillin > pointer output << mut.mutant.downcase pointer += fillin - pointer + 1 + mut.mutant.length end end end # Remaining sequence if offset + length > pointer output << @seq[pointer..offset-1+length-1-deletions+insertions+preceding_diff] end output.join end
regenerate_string()
click to toggle source
# File lib/bio-sam-mutation/bio/db/alignment.rb, line 161 def regenerate_string tags_string = @tags.map{|k,v| [v.tag, v.type, v.value].join(":") } self.sam_string = [@qname, @flag, @rname, @pos, @mapq, @cigar, @mrnm, @mpos, @isize, @seq, @qual, tags_string].join("\t") end