class ExonerateResult

Extracts results from blast table's file and uses it to create instances of “BlastQuery” and “BlastHit”

Attributes

querys[RW]

Public Class Methods

new(input, seqs= nil, query_seqs = nil, all = TRUE) click to toggle source

Parser initialization

# File lib/full_lengther_next/exonerate_result.rb, line 42
def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
  @querys = []
  @seqs = seqs #unigenes
  @prot_seqs = query_seqs#prot

  if input.is_a?(Array)
      input.each do |file|
             parse_file(File.open(file).readlines, all)
      end
  else
    parse_file(File.open(input).readlines, all)
  end
  query_name=''
end

Public Instance Methods

convert_parsed_lines(lines_parsed) click to toggle source
# File lib/full_lengther_next/exonerate_result.rb, line 81
def convert_parsed_lines(lines_parsed)
        last_query = nil
        query = nil
        lines_parsed.each_with_index do |line|
                begin
                        if lines_parsed.class.to_s=='Array'
                                align_data=line['align_data']
                                features=line
                        else #hash
                                align_data=line[1]['align_data']
                                features=line[1]        
                        end
                        tags = align_data.scan(/([MFG53S]) ([0-9]+) ([0-9]+)/)
                        tags.map!{|tag| [tag[0], tag[1].to_i, tag[2].to_i]}
                        if features['target_id'] != last_query
                                last_query = features['target_id']
                                query = BlastQuery.new(features['target_id'])
                                @querys << query
                        end
                     hiting(features,tags, query)
                rescue
                        puts "Result: #{features['target_id']} => #{features['query_id']} hasn't been parsed\n#{line}"
                end
        end    
end
define_hit_parameters(hit, features, tags) click to toggle source
# File lib/full_lengther_next/exonerate_result.rb, line 203
def define_hit_parameters(hit, features, tags)
        hit.gaps = 0
        tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
        hit.reversed = FALSE
        hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1 
        hit.mismatches=0
        hit.e_val=0
        hit.bit_score=0
        hit.score = features['score']
        hit.s_frame = nil
        strand = 1
        strand = -1 if features['target_strand'] == '-'
        hit.q_frame = (((features['target_start_align']) % 3) +1) *strand
        hit.subject_id = features['query_id']
        hit.full_subject_length=0
        hit.definition=''
        hit.acc=features['query_id']
        hit.q_seq=''
        hit.s_seq=''
end
do_reverso_complementary(counter_target, target_seq) click to toggle source
# File lib/full_lengther_next/exonerate_result.rb, line 186
def do_reverso_complementary(counter_target, target_seq)
        counter_target = target_seq.length - 1 - counter_target 
        target_seq = target_seq.complementary_dna
        return counter_target, target_seq
end
empty?() click to toggle source

check if there are querys

# File lib/full_lengther_next/exonerate_result.rb, line 246
def empty?

  return @querys.empty?
end
find_query(querys,name_q) click to toggle source

find query by name

# File lib/full_lengther_next/exonerate_result.rb, line 234
def find_query(querys,name_q)
  #  newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
  new_q=nil

  if !querys.empty?
    new_q=querys.find{|q| (q.query_id==name_q)}
  end

  return new_q
end
hiting(features, tags, query) click to toggle source

this method only works fine with –model protein2dna parameter of exonerate

# File lib/full_lengther_next/exonerate_result.rb, line 108
def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits
        do_align = FALSE
        do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
        start_target = features['target_start_align']#Unigen
        start_query = features['query_start_align'] #proteina
        ends_target = features['target_end_align']
        ends_query = features['query_end_align']-1 # -> Exonerate don't set to 0 position the ends of target and query
        if features['target_strand'] == '-' #-> Exonerate don't set to 0 position the ends of target and query
                start_target -= 1 # Start target is end target when mathc is in reversed complementary strand
        else
                ends_target -= 1
        end
        hit = ExoBlastHit.new(start_target+1, ends_target+1, start_query+1, ends_query+1)
        define_hit_parameters(hit, features, tags)
        query.add_hit(hit)
        
        #Define alignment and blast like parameters
        target_alignment = ''
        query_alignment = ''
        counter_target = start_target
        counter_query = start_query
        if do_align #get seqs
                query_seq = @prot_seqs[features['query_id']]
                target_seq = @seqs[features['target_id']]
        end
        counter_target, target_seq = do_reverso_complementary(counter_target, target_seq) if features['target_strand'] == '-'
        query_frameshift = []
        target_frameshift = []
        gap_shift = 0
        #puts features['query_id']+ ' ' +features['target_strand'], '-----------------------'
        tags.each_with_index do |tag, n_operation|
                #puts tag.inspect
                if do_align
                        gap_shift = 0 if tag[OPERATION] != 'G'
                        query_alignment << query_seq[counter_query, tag[QUERY]]
                        target_alignment << target_seq[counter_target, tag[TARGET]].translate
                end
                if tag[OPERATION] == 'F'
                        if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
                                gap_shift += 1
                                if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
                                        query_alignment <<  '-' if do_align
                                end
                        else
                                query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
                        end
                        query_frameshift << counter_query
                        fs_counter_target = counter_target
                        fs_counter_target = target_seq.length - counter_target if features['target_strand'] == '-' # ESto es un apaño, habria que plantear el parseo de las reversas como reduccion en el contador del formato del exonerate, en vez de como adiccion
                        if tag[TARGET] > 3
                                real_fs = tag[TARGET]%3
                                real_gap = tag[TARGET] - real_fs
                                fs = [fs_counter_target + real_gap, real_fs]
                        else
                                fs = [fs_counter_target, tag[TARGET]]
                        end
                        target_frameshift << fs
                elsif tag[OPERATION] == 'G'
                        query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
                        diff = tag[QUERY] - gap_shift
                        target_alignment << '-' * diff if do_align && diff > 0
                        gap_shift = 0
                end  
                counter_query += tag[QUERY]
                counter_target += tag[TARGET]
        end
        hit.s_frameshift = query_frameshift
        hit.q_frameshift = target_frameshift

        #puts "\e[33m#{target_alignment}\e[0m", "\e[36m#{query_alignment}\e[0m"
        if do_align
                hit.q_seq = target_alignment
                hit.s_seq = query_alignment
                hit.align_len = query_alignment.length
                hit.ident = set_ident(target_alignment,query_alignment)
        end
end
inspect() click to toggle source

inspect results

# File lib/full_lengther_next/exonerate_result.rb, line 225
def inspect
  res = "Exonerate results:\n"
  res+= '-'*20
  res+= "\nQuerys: #{@querys.count}\n"
  @querys.each{|q| res+=q.inspect+"\n"}
  return res
end
parse_file(lines, all) click to toggle source
# File lib/full_lengther_next/exonerate_result.rb, line 57
def parse_file(lines, all)
       lines_parsed=[] 
    lines_parsed={} if !all
    lines.each do |line|
                if line=~ /^vulgar:/
                        line.chomp!
                        fields=line.split(' ', 11)
                        features={'query_id'=> fields[1], 'query_start_align'=> fields[2].to_i, 'query_end_align'=> fields[3].to_i, 'query_strand'=> fields[4],'target_id'=> fields[5], 'target_start_align'=> fields[6].to_i, 'target_end_align'=> fields[7].to_i, 'target_strand'=> fields[8], 'score'=> fields[9].to_i, 'align_data'=> fields[10]}
                        if all
                                lines_parsed << features
                        else
                          if !lines_parsed.key?(features['target_id']) # Añadir valor si no existe
                            lines_parsed[features['target_id']]=features
                          else
                            if features['score']>lines_parsed[features['target_id']]['score'] # Si ya existe una query, ver si la nueva presenta un mayor score y reemplazar la antigua
                                    lines_parsed[features['target_id']]=features
                            end
                          end
                        end
                end
        end           
        convert_parsed_lines(lines_parsed)    
end
set_ident(target_alignment, query_alignment) click to toggle source
# File lib/full_lengther_next/exonerate_result.rb, line 192
def set_ident(target_alignment, query_alignment)
        matchs = 0
        position = 0
        target_alignment.each_char do |char|
                matchs +=1 if char == query_alignment[position]
                position +=1
        end
        perc_ident = ('%.2f' % (matchs*100.0/target_alignment.length)).to_f
        return perc_ident
end
size() click to toggle source

get query count

# File lib/full_lengther_next/exonerate_result.rb, line 252
def size
  @querys.size
end