module ChimericSeqs

Constants

BEG
HIT
STOP

Public Instance Methods

cluster_query_hits(query) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 211
def cluster_query_hits(query)
        hits = []
        acc_hit = []
        query.hits.each do |hit|
                ind = acc_hit.index(hit.acc)
                if ind.nil?
                        acc_hit << hit.acc
                        hits << [hit]
                else
                        hits[ind] << hit
                end
        end
        return hits           
end
confirm_chimeras(homology_zones, db_path, ident_thresold) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 63
def confirm_chimeras(homology_zones, db_path, ident_thresold)
        acc_hit = homology_zones.map{|zone| zone[HIT].first.acc}
        seq_fasta = %x[blastdbcmd -db #{db_path} -entry #{acc_hit.join(',')}]
        seq_fasta << ">remove\nALGO\n" #Needed for clustal-omega display the dist-matrix, requires unless 3 sequences to do it

        clustal_matrix = do_clustal(seq_fasta)
        clustal_matrix.shift #Remove header
        clustal_matrix.pop #Remove false sequence
        
        clustal_hits = []
        distances = []
        clustal_matrix.each do |line|
                fields = line.split
                fields.pop #Remove data belong to false sequence
                fields.shift #Remove prot name
                distances << fields.map! {|field| field.to_f}        
        end
        delete_positions = search_ident_prots(homology_zones, distances, ident_thresold)
        delete_zones(delete_positions, homology_zones)
end
define_hit_limits(hits) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 187
def define_hit_limits(hits)
        limits=[]
        hits.each do |hit|
                limits << get_limits(hit) 
        end
        return limits
end
define_homology_zones(query, options, query_fasta) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 150
def define_homology_zones(query, options, query_fasta)
        # Define hit limits
        #---------------------
        hits = cluster_query_hits(query) #Hsp packages
        hits_limits = define_hit_limits(hits)

        # Define homology zones
        #------------------------
        #First homology zone
        zones = [[hits_limits.first[BEG], hits_limits.first[STOP], hits.first]]
        ref_hit_beg = hits_limits.first[BEG]
        ref_hit_end = hits_limits.first[STOP]
        
        #Other homology zone
        hits_limits.each_with_index do |hit, i|
                coincidences = 0
                zones.each do |zone|
                        if  hit_is_in?(zone[BEG], zone[STOP], hit) # Extender zona de homologia si coinciden en zona
                                zone[BEG] = [zone[BEG],hit[BEG]].min
                                zone[STOP] = [zone[STOP],hit[STOP]].max
                                coincidences+=1
                        end
                end
                if coincidences == 0
                        zones << [hit[BEG], hit[STOP], hits[i]]
                end
        end
        zones.sort!{|e1,e2| e1[BEG] <=> e2[BEG]}

        # Delete overlapping homology zones
        #------------------------------------
        overlapping_zones = overlapping_zones(zones)
        delete_zones(overlapping_zones, zones)

        return zones
end
delete_zones(overlapping_zones, zones) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 226
def delete_zones(overlapping_zones, zones)
        overlapping_zones.each do |zone|
                zones.delete_at(zone)
        end
end
do_clustal(seq_fasta) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 323
def do_clustal(seq_fasta)
        cmd='clustalo -i -  -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
        clustal_matrix = nil
        IO.popen(cmd,'w+') {|clustal|
                clustal.sync = TRUE
                clustal.write(seq_fasta)
                clustal.close_write
                clustal_matrix = clustal.readlines
                clustal.close_read
        }
        return clustal_matrix
end
duplicate_hits(query) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 279
def duplicate_hits(query)
        dup_hits=[]
        query.hits.each do |hit|
                dup_hits << hit.dup
        end
        return dup_hits
end
fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 102
def fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions)
        # Prepare new seq and query
        #----------------------------
        query_bak = query.dup
        query_bak.hits = hit # Here, hit is an array of hsps
        query_bak.query_def += "_split_#{hit_position}"
        seq_bak = seq.dup
        seq_bak.reset_classification
        seq_bak.clean_warnings
        seq_bak.seq_name += "_split_#{hit_position}"
        seq_bak.clean_orfs
        seq_bak.save_fasta = TRUE
        seq_bak.ignore = FALSE

        # Cut sequence and move hit/hsps limits
        #----------------------------------------
        if hit_position == 0 #First zone
                limit = 0
                if hit.first.q_frame < 0 #Hit reversed
                        hit.first.q_frame = -1
                end
        else #Middle & last zone
                limit = cut_positions[BEG]#hit_limits[BEG]
                hit_move_limits(hit, -limit, 0) #Redefine hit limits on new sequence after cut
                if hit.first.q_frame >= 0
                        hit.first.q_frame=1
                elsif hit_position < num_homology_zones-1 #Last zone keeps his original frame because it's composed by the hit and the terminal sequence (Here hit is reversed).
                        hit.first.q_frame=-1
                end
        end
        if hit_position == num_homology_zones-1 #Last zone
                seq_bak.seq_fasta = seq.seq_fasta[cut_positions[BEG]..seq.fasta_length-1]#[hit_limits[BEG]..seq.fasta_length-1]
        else  # Beginning & Middle zone
                seq_bak.seq_fasta = seq.seq_fasta[limit..cut_positions[STOP]]#[limit..hit_limits[STOP]]
        end
        seq_length = seq_bak.seq_fasta.length
        query_bak.full_query_length = seq_length
        seq_bak.fasta_length = seq_length
        hit_set_q_len(hit, seq_length)


        # Full length analisys of fragment
        #----------------------------------------
        analiza_orf_y_fl(seq_bak, query_bak.hits, options, db_name)

        return seq_bak
end
get_hits(query, ref_hit) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 255
def get_hits(query, ref_hit)
        all_hits=[]
        query.hits.each do |hit|
                if hit.acc == ref_hit.acc
                        all_hits << hit
                end
        end
        return all_hits
end
get_limits(hit) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 195
def get_limits(hit)
        coordenates=[]
        hit.map{|h| coordenates << h.q_beg; coordenates << h.q_end}   
        #             BEG                                END
        limits=[coordenates.min, coordenates.max]
        return limits
end
get_limits_s(hit) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 203
def get_limits_s(hit)
        coordenates=[]
        hit.map{|h| coordenates << h.s_beg; coordenates << h.s_end}   
        #             BEG                                END
        limits=[coordenates.min, coordenates.max]
        return limits
end
hit_is_in?(h_beg, h_end, hit) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 246
def hit_is_in?(h_beg, h_end, hit)
        is=FALSE
                        # CONTIENE                                  #OVERLAP
        if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
                is=TRUE
        end
        return is
end
hit_move_limits(hit, q_add, s_add) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 306
def hit_move_limits(hit, q_add, s_add)
        if hit.class.to_s == 'Array'
                hit.each do |hsp|
                        move_limits(hsp, q_add, s_add)
                end
        elsif hit.class.to_s == 'Hit'
                #puts "\e[35m#{hit.acc}\t#{hit.q_beg}\t#{hit.q_end}\t#{hit.s_beg}\t#{hit.s_end}\t#{hit.reversed}\e[0m"
                move_limits(hit, q_add, s_add)
        end
end
hit_set_q_len(hit, q_len) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 317
def hit_set_q_len(hit, q_len)
        hit.each do |hsp|
                hsp.q_len=q_len
        end
end
min_distance_between_homology_zones(homology_zones) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 266
def min_distance_between_homology_zones(homology_zones)
        distance=nil
        homology_zones.each_with_index do |zone,i|
                if i > 0
                        local_distance=homology_zones[i][BEG] - homology_zones[i-1][STOP]
                        if distance.nil? || distance > local_distance
                                distance=local_distance
                        end 
                end
        end
        return distance
end
move_limits(hit, q_add, s_add) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 294
def move_limits(hit, q_add, s_add)
        hit.q_beg+=q_add
        hit.q_end+=q_add
        hit.s_beg+=s_add
        hit.s_end+=s_add
        if hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? #There is frameshift
                hit.q_frameshift.map!{|fs| 
                        [fs.first + q_add, fs.last]
                }
        end
end
overlapping_zones(zones) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 232
def overlapping_zones(zones)
        delete_zones=[]
        zones.each_with_index do |zone, i|
                if i>0
                        if zone[BEG]< zones[i-1][STOP]
                                delete_zones << i
                                delete_zones << i-1        
                        end
                end
        end
        delete_zones.uniq!
        return delete_zones
end
search_chimeras(seq, blast_query, options, db_name, db_path) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 9
def search_chimeras(seq, blast_query, options, db_name, db_path)
        
        # DETECTION
        #----------------------
        homology_zones = []
        cut_positions = []
        if blast_query.hits.length > 1 
                homology_zones = define_homology_zones(blast_query, options, seq.seq_fasta)          
                cut_positions = set_cut_positions(homology_zones) if homology_zones.length > 1
        end
        # CONFIRMATION
        #----------------------
        num_homology_zones = homology_zones.length
        if num_homology_zones > 1 && options[:chimera].include?('r')
                confirm_chimeras(homology_zones, db_path, options[:ident_thresold]) # Check if prots are differents or not
                num_homology_zones = homology_zones.length
        end

        # SPLICING
        #--------------------
        new_seqs=[]           
        if num_homology_zones > 1 #In this case the sequence is a chimera
                seq.format_chimera!
                homology_zones.each_with_index do |hom_zone, i|
                        seq.hit << hom_zone[HIT].first.dup #Save hit before modified it for write output purposes
                        hit_limits = get_limits(hom_zone[HIT])# Take beginning and end of hit on query, hit can be composed by unsorted or antisense hsps
                        if options[:chimera].include?('c') && hit_limits[STOP]-hit_limits[BEG]> options[:min_nucleotides] 
                                new_seqs << fragment_chimera(blast_query, seq, hom_zone[HIT], i, hit_limits, num_homology_zones, options, db_name, cut_positions[i])
                                seq.warnings('SOLVED')
                        end
                end
        else
                new_seqs = nil #Sequence isn't chimera
        end
        return new_seqs
end
search_ident_prots(homology_zones, distances, ident_thresold) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 85
def search_ident_prots(homology_zones, distances, ident_thresold)
        delete_positions = []         
        n_homology_zones = homology_zones.length
        n_homology_zones.times do |j|
                n_homology_zones.times do |i|                        
                        next if i == j
                        if distances[j][i] >= ident_thresold
                                delete_positions << j
                                delete_positions << i
                        end
                end
        end
        delete_positions.uniq!
        return delete_positions
end
set_cut_positions(homology_zones) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 46
def set_cut_positions(homology_zones)
        cut_positions = []
        last_cut = -1
        homology_zones.each_with_index do |hom_zone, i|
                if i > 0
                        positions = []
                        positions << last_cut + 1 # Start of fragment
                        cut_position = homology_zones[i-1][STOP] + (hom_zone[BEG] - homology_zones[i-1][STOP])/2
                        positions << cut_position # End of fragment
                        last_cut = cut_position
                        cut_positions << positions
                end
        end
        cut_positions << [last_cut, homology_zones.last[HIT].first.q_len-1]
        return cut_positions          
end
set_limits(hit, q_beg, q_end, s_beg, s_end) click to toggle source
# File lib/full_lengther_next/chimeric_seqs.rb, line 287
def set_limits(hit, q_beg, q_end, s_beg, s_end)
        hit.q_beg = q_beg
        hit.q_end = q_end
        hit.s_beg = s_beg
        hit.s_end = s_end
end