class Object

Constants

CHIMERA
CODING
COMPLETE
C_TERMINAL
FAILED
INTERNAL
MISASSEMBLED
NCRNA
N_TERMINAL
OPERATION
OTHER
QUERY
TARGET
UNKNOWN
UNMAPPED

Public Instance Methods

analysis_over_DB_annotated_seqs(seqs_annotation_DB, reptrans_fasta, cluster_file_path, stats_hash, key_stats, pfam_clustering) click to toggle source

END MAIN FUNCTION

# File lib/full_lengther_next/reptrans.rb, line 67
def analysis_over_DB_annotated_seqs(seqs_annotation_DB, reptrans_fasta, cluster_file_path, stats_hash, key_stats, pfam_clustering)
        clusters_seqs_annot_DB = clustering_by_id(seqs_annotation_DB)
        representative_seqs_annot_DB = select_representative(clusters_seqs_annot_DB)
        if pfam_clustering
                clusters_seqs_annot_DB = clustering_by_annot(representative_seqs_annot_DB, :pfam_id) # pfam id, fix get the annotation guide on my_worker_manager_fln (@@func_annot_type) to this scope
                representative_seqs_annot_DB = select_representative(clusters_seqs_annot_DB) # merge clusters by id and by pfam
        end
        stats_hash[key_stats] += representative_seqs_annot_DB.length
        report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB)
        write_fasta(representative_seqs_annot_DB, reptrans_fasta, 'w')
end
artifact?(seq, query, db_name, db_path, options, new_seqs) click to toggle source

MAIN FUNCTION

# File lib/full_lengther_next/artifacts.rb, line 9
def artifact?(seq, query, db_name, db_path, options, new_seqs)
        artifact = FALSE
        # UNMAPPED CONTIG DETECTION
        if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys
                seq.hit = nil
                artifact = TRUE
                seq.type = UNMAPPED
        end

        if !query.nil?
                # MISASSEMBLED DETECTION
                if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys
                        seq.hit = query.hits.first
                        artifact = TRUE
                        seq.type = MISASSEMBLED
                        seq.warnings('ERROR#1')
                end

                # OVERLAPPING HSPS ON SUBJECT DETECTION
=begin
                if !artifact
                        hit_reference = query.hits.first.dup
                        query, overlapping = overlapping_hsps_on_subject(query)
                        if overlapping
                                if query.hits.first.nil?
                                        seq.hit = hit_reference
                                else
                                        seq.hit = query.hits.first
                                end
                                artifact = TRUE
                                seq.type = OTHER
                                seq.warnings('ERROR#2')
                        end
                end
=end

                # MULTIPLE HSP DETECTION
                if !artifact && multiple_hsps(query, 3)   
                        seq.hit = query.hits.first
                        seq.warnings('ERROR#3')
                end

                # CHIMERA DETECTION
                if !artifact && !options[:chimera].include?('d')  
                        chimera = search_chimeras(seq, query, options, db_name, db_path)                     
                        if !chimera.nil?   
                                new_seqs.concat(chimera)
                                seq.db_name = db_name
                                seq.type = CHIMERA
                                artifact = TRUE
                        end
                end
        end
        if artifact
                if $verbose > 1
                        puts seq.prot_annot_calification
                end
                seq.db_name = db_name
                seq.save_fasta = FALSE
                seq.ignore = TRUE
        end
        return artifact
end
clean_by_identity(blast_result, ident) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 137
def clean_by_identity(blast_result, ident)
        blast_result.querys.each do |query|
                if !query.hits.first.nil?
                        new_hits = query.hits.select{|hit| hit.ident > ident}        
                        new_hits = [nil] if new_hits.empty? #When no hit, set new_hits to [nil]
                        query.hits = new_hits
                end
                query.full_query_length = query.full_query_length.to_i #to_i is used to correct a scbi_blast's bug. Returns this attribute like string instead integer
        end
end
clean_by_query_length_match(blast_result, min_len_nt) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 148
def clean_by_query_length_match(blast_result, min_len_nt)
        blast_result.querys.each do |query|
                if !query.hits.first.nil?
                        new_hits = query.hits.select{|hit| hit.align_len * 3 > min_len_nt}   
                        new_hits = [nil] if new_hits.empty? #When no hit, set new_hits to [nil]
                        query.hits = new_hits
                end
                query.full_query_length = query.full_query_length.to_i #to_i is used to correct a scbi_blast's bug. Returns this attribute like string instead integer

        end
end
clean_hsp_by_identity(hit, identity) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 298
def clean_hsp_by_identity(hit, identity)
        hit.select!{|hsp| hsp.ident >= identity}
        return hit
end
clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 161
def clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE)
        blast_result.querys.each do |query|
                if query.hits.length > 1
                        query.hits.each_with_index do |hit, j|
                                if hit.nil?
                                        next
                                end
                                query.hits.each_with_index do |second_hit, i|
                                        if second_hit.nil? || i == j #Same hit
                                                next
                                        end
                                        if same_query_hsp(hit, second_hit) #|| same_subject_hsp(hit, second_hit)
                                                if keep_if_diff_sense 
                                                        if same_sense?(hit, second_hit) #Delete second_hit if is into the hit and has same sense
                                                                query.hits[i] = nil
                                                        end
                                                else
                                                        query.hits[i] = nil
                                                end
                                        end
                                end
                        end
                        query.hits.compact!
                end
        end
end
clean_subject_overlapping_hsps(complete_hit, cleaned_hits) click to toggle source

COMPLEMENTARY FUNCTIONS

# File lib/full_lengther_next/blast_functions.rb, line 246
def clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
        if complete_hit.length > 1
                complete_hit, overlapping = subject_overlapping_hsps(complete_hit)
        end
        cleaned_hits.concat(complete_hit)
        return complete_hit, overlapping
end
cluster_hsps(hsps) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 303
def cluster_hsps(hsps)
        hits = []
        last_acc = ''
        hsps.each do |hsp|
                if hsp.acc != last_acc
                        hits << [hsp]
                else
                        hits.last << hsp
                end
                last_acc = hsp.acc
        end
        return hits
end
clustering_by_annot(seqs_with_hit, annotation_type) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 165
def clustering_by_annot(seqs_with_hit, annotation_type)
        clusters = []
        annot_id = []
        no_annotation_clusters = []
        seqs_with_hit.each do |seq|
                annot = seq.functional_annotations[annotation_type]
                annot = annot.split(';').sort.join(';') if !annot.nil?
                if annot == '-' || annot.nil?
                        no_annotation_clusters << [seq]
                else
                        position = annot_id.index(annot)
                        if position.nil?
                                annot_id << annot
                                clusters << [seq]
                        else
                                clusters[position] << seq 
                        end
                end
        end
        clusters.concat(no_annotation_clusters)
        return clusters
end
clustering_by_id(seqs_with_hit) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 150
def clustering_by_id(seqs_with_hit)
        clusters=[]
        hit_id=[]
        seqs_with_hit.each do |seq|
                position=hit_id.index(seq.get_acc)
                if position.nil?
                        hit_id << seq.get_acc
                        clusters << [seq]
                else
                        clusters[position] << seq 
                end
        end    
        return clusters
end
count_cpu(options) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 206
def count_cpu(options)
        cpu = 0
        if options[:workers].class.to_s == 'Array'
                cpu = options[:workers].length + 1
        else
                cpu = options[:workers]
        end
        return cpu
end
do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 101
def do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash) # Second server to representative transcriptome
        $LOG.info 'Starting server for EST analysis'
                custom_worker_file = File.join(File.dirname(ROOT_PATH),'lib','full_lengther_next','classes','my_worker_EST.rb')
                options[:chimera] = nil #Inactive chimeras system on RepTrans, this resume the BLAST's output
        
                MyWorkerManagerEst.init_work_manager(putative_seqs, options, blast_path)
                server_EST = ScbiMapreduce::Manager.new(options[:server_ip], options[:port], options[:workers], MyWorkerManagerEst, custom_worker_file, STDOUT, FULL_LENGTHER_NEXT_INIT)
                server_EST.chunk_size = options[:chunk_size]
                server_EST.start_server
        $LOG.info 'Closing server for EST analysis'

        seqs_with_EST, putative_seqs = MyWorkerManagerEst.get_array_seqs
        if !seqs_with_EST.empty?
                analysis_over_DB_annotated_seqs(seqs_with_EST, reptrans_fasta, cluster_EST_annotated_path, stats_hash, 'est_annotated') 
        end
        return putative_seqs
end
do_makeblastdb(seqs, output, dbtype) click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 35
def do_makeblastdb(seqs, output, dbtype)
        cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
        IO.popen(cmd,'w+') {|makedb|
                makedb.sync = TRUE
                makedb.write(seqs)
                makedb.close_write
                puts makedb.readlines
                makedb.close_read
        }
end
filter_hits(query, select_hits=10) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 4
def filter_hits(query, select_hits=10) # Select best hits
        hits = query.hits
        if !hits.first.nil?
                hits = cluster_hsps(hits)
                hits = hits[0..select_hits]
                hits = select_hits_by_identity_query(hits)
                hits = select_hits_by_coverage_subject(hits)
        end
        if hits.empty? 
                if select_hits >= query.hits.length || select_hits >= 100 # Condition to stop a infinite recursive function
                        hits = [cluster_hsps(query.hits).first]
                else
                        hits = filter_hits(query, select_hits+10)
                end
        end
        return hits
end
find_hit(hit_acc, ar_hits) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 317
def find_hit(hit_acc, ar_hits)
        selected_hit = nil
        ar_hits.each do |hit|
                if hit.first.acc == hit_acc
                        selected_hit = hit
                        break
                end
        end
        return selected_hit
end
get_coverage_subject(hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 22
def get_coverage_subject(hit)
        perc_identity = hit.align_len*100.0/hit.s_len
        if perc_identity > 100 && hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty?
                hit.q_frameshift.length.times do |n| #Align len correction by frameshift. FS can create a gap in alignment adding extra aa. FS can be deletions or insertions so we check until get a perc_identity of 100
                        align_len = hit.align_len- (n + 1)
                        perc_identity = align_len*100.0/hit.s_len
                        break if perc_identity <= 100
                end
        end 
        return perc_identity
end
go_for_graph(sequences_by_ontologies, fpkm = {}) click to toggle source
# File lib/full_lengther_next/go_methods.rb, line 1
def go_for_graph(sequences_by_ontologies, fpkm = {})
        container = {}
        go_data = [
                [:function_go, 'F:'],
                [:component_go, 'C:'],
                [:process_go, 'P:']
        ]

        go_data.each do |key, prefix|          
                go_ontology = sequences_by_ontologies.select{|go, seq_ids| go =~ /^#{prefix}/}
                go_names = [] 
                go_vals = []
                go_ontology.each do |go_name, seq_names|
                        go_label = go_name.gsub(prefix, '')
                        if fpkm.empty?
                                go_vals << seq_names.length
                                go_names << go_label
                        else
                                sum = seq_names.map{|seq_name| fpkm[seq_name].first }.inject { |sum, n| sum + n }
                                if sum > 0
                                        go_vals << sum 
                                        go_names << go_label
                                end
                        end
                end
                go_table = []
                go_names.each_with_index do |name, index|
                        go_table << [name, go_vals[index]]
                end
                go_table.sort!{|v1, v2| v2[1] <=> v1[1]}
                go_table.unshift([key.to_s, 'GO'])
                if !go_names.empty?
                        container[key] = go_table 
                else
                        container[key] = [
                                [key.to_s, 'GO'],
                                ['No_data', 1]
                        ] 
                end
        end
        return container
end
hsps_relationship_subject(hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 271
def hsps_relationship_subject(hit)
        hsps = []
        hit.each_with_index do |hsp, j|
                hit.each_with_index do |second_hsp, i|
                        if i == j #Same hit
                                next
                        end
                        if same_subject_hsp(hsp, second_hsp)
                                if !hsps.include?([hsp, second_hsp]) && !hsps.include?([second_hsp, hsp]) # Save if no exists direct relationship or his inverse
                                        hsps << [hsp, second_hsp]
                                end
                        end
                end
        end
        return hsps
end
load_cd_hit_sequences_names(file) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 120
def load_cd_hit_sequences_names(file)
        names=[]
        File.open(file).readlines.each do |line|
                if line =~ /^>/
                        line.chomp!
                        line.gsub!('>','')
                        names << line
                end   
        end
        return names
end
load_isoform_hash(file) click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 3
def load_isoform_hash(file)
        isoform_hash = {}
        if File.exists?(file)
                fasta = ScbiZcatFile.new(file)
            filtered_fasta = ''
                seq_name = nil
                seq = ''
                while !fasta.eof
                        line = fasta.readline.chomp
                        if line[0] == '>'                             
                                                load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil?
                                seq_name = line
                                seq = ''
                        else
                                seq << line
                        end
                end
                load_seq_in_hash(seq_name, seq, isoform_hash)
        end
        return isoform_hash
end
load_seq_in_hash(seq_name, seq, isoform_hash) click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 25
def load_seq_in_hash(seq_name, seq, isoform_hash)
        name, desc = seq_name.split(' ', 2) 
        name =~ /(\w+\|(\w+)\-\d+\|)/
        if isoform_hash[$2].nil?
                isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
        else
                isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
        end
end
misassembled_detection(query) click to toggle source

DETECTION FUNCTIONS

# File lib/full_lengther_next/blast_functions.rb, line 192
def misassembled_detection(query)
        miss=FALSE
        hits = cluster_hsps(query.hits)
        misassembled_hits = []
        hits.each do |hit|
                if hit.length > 1
                        negative_frame = hit.select{|hsp| hsp.q_frame < 0}
                        if negative_frame.length > 0 && negative_frame.length != hit.length
                                misassembled_hits << hit.first.acc
                        end
                end
        end
        if misassembled_hits.length*1.0/ hits.length > 0.5
                miss = TRUE
        else #Remove missassembled hits to avoid broken analysis
                query.hits.reverse_each do |hsp|
                        if misassembled_hits.include?(hsp.acc)
                                query.hits.delete(hsp)
                        end
                end
        end
        return miss
end
multiple_hsps(query, num) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 216
def multiple_hsps(query, num)
        multiple = FALSE
        hsps = query.hits.select{|h| h.acc == query.hits.first.acc}    
        if hsps.length >= num
                multiple = TRUE
        end
        return multiple
end
overlapping_hsps_on_subject(query) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 225
def overlapping_hsps_on_subject(query)
        overlapping = FALSE
        current_hit = query.hits.first.acc
        complete_hit = []
        cleaned_hits = []
        query.hits.each do |hit|
                if hit.acc != current_hit
                        complete_hit, overlapping = clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
                        complete_hit = []
                end
                complete_hit << hit
                current_hit = hit.acc
        end
        complete_hit, overlapping = clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
        query.hits = cleaned_hits
        return query, overlapping
end
reduce_pool_sequences(putative_seqs, main_path, cpu) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 88
def reduce_pool_sequences(putative_seqs, main_path, cpu)
        temp_fasta = File.join(main_path, 'temp.fasta')
        temp_fasta_clean = File.join(main_path, 'temp_cln.fasta')
        log_file = File.join(main_path, 'log_cd_hit_Cod_Unk')
        write_fasta(putative_seqs, temp_fasta, 'w')
        $LOG.info "Start cd-hit with coding and unknow sequences"      
        system("cd-hit -i #{temp_fasta} -o #{temp_fasta_clean} -c 0.95 -M 0 -T #{cpu} > #{log_file}") if !File.exists?(temp_fasta_clean)
        $LOG.info "Ended cd-hit with coding and unknow sequences"      
        cd_hit_names_putative_seqs = load_cd_hit_sequences_names(temp_fasta_clean)
        putative_seqs = select_seqs_with_name(putative_seqs, cd_hit_names_putative_seqs)
        return putative_seqs
end
report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 79
def     report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB)
        cluster_file = File.open(cluster_file_path, 'w')
        representative_seqs_annot_DB.each_with_index do |rep_seq, i|
                cluster_seqs = clusters_seqs_annot_DB[i].map{|seq| seq.seq_name}.join(';')
                cluster_file.puts "#{rep_seq.seq_name}\t#{cluster_seqs}"
        end
        cluster_file.close
end
reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options) click to toggle source

MAIN FUNCTION

# File lib/full_lengther_next/reptrans.rb, line 9
def reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options)
        cpus = count_cpu(options)
        stats_hash = initialize_stats_hash_reptrans
        # Paths
        #---------------------------------------------
        main_path = File.join(Dir.pwd, 'fln_results')
        reptrans_fasta = File.join(main_path, 'Representative_transcriptome.fasta')
        blast_path = File.join(main_path, 'ESTdb')
        cluster_prot_annotated_path =File.join(main_path, 'Prot_clusters')
        cluster_EST_annotated_path =File.join(main_path, 'EST_clusters')
        html_file = File.join(main_path, 'Representative_transcriptome_stats.html')
        txt_file = File.join(main_path, 'Representative_transcriptome_stats.txt')

        # Prot annotations sequence analysis
        #---------------------------------------------
        analysis_over_DB_annotated_seqs(seqs_annotation_prot, reptrans_fasta, cluster_prot_annotated_path, stats_hash, 'prot_annotated', options[:high_clustering])
        seqs_annotation_prot = nil

        # NOT Prot annotations sequence analysis
        #---------------------------------------------
        putative_seqs = seqs_some_coding       
        if !options[:est_db].nil? # WITH EST DATABASE
                putative_seqs += seqs_unknown # Coding & unknown
                putative_seqs = reduce_pool_sequences(putative_seqs, main_path, cpus)
                if !File.exists?(blast_path +'.nsq')
                        $LOG.info "Start makeblastdb over EST DB"
                        system("makeblastdb -in #{options[:est_db]} -out #{blast_path} -dbtype nucl -parse_seqids > #{File.join(main_path, 'log_makeblast_db')}")
                        $LOG.info "Ended makeblastdb over EST DB"
                end
                putative_seqs = do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash)
        end

        # Coding sequence analysis
        #---------------------------------------------
        if !putative_seqs.nil? && !putative_seqs.empty?
                putative_seqs = select_seqs_more_500pb(putative_seqs)
                putative_seqs = reduce_pool_sequences(putative_seqs, main_path, cpus) if options[:est_db].nil? # NOT EST database
                putative_seqs.sort!{|s1, s2| #Order by testcode (first) and sequence length (last)
                        if s2.t_code == s1.t_code
                                s2.fasta_length <=> s1.fasta_length
                        else
                                s2.t_code <=> s1.t_code
                        end
                }
                count = 0
                putative_seqs.each do |coding_seq|
                        coding_stats_reptrans(coding_seq, stats_hash)
                        count +=1
                end
 
                write_fasta(putative_seqs, reptrans_fasta, 'a')
        end
        write_reptrans_stats(stats_hash, html_file, txt_file)
end
same_query_hsp(hit, second_hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 117
def same_query_hsp(hit, second_hit)
        same = FALSE
        if hit.acc == second_hit.acc
                if hit.q_beg <= second_hit.q_beg && hit.q_end >= hit.q_end && (second_hit.q_beg - hit.q_end).abs > 1
                        same = TRUE
                end
        end
        return same            
end
same_sense?(hit, second_hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 127
def same_sense?(hit, second_hit)
        same= FALSE
        hit_sense = hit.q_frame <=> 0
        second_hit_sense = second_hit.q_frame <=> 0
        if hit_sense == second_hit_sense
                same = TRUE
        end
        return same
end
same_subject_hsp(hit, second_hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 107
def same_subject_hsp(hit, second_hit)
        same = FALSE
        if hit.acc == second_hit.acc
                if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
                        same = TRUE
                end
        end
        return same
end
select_hits_by_coverage_subject(hits) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 34
def select_hits_by_coverage_subject(hits)
        selected_hits = []
        coverage_thresold = get_coverage_subject(hits.first.first)
        coverage_thresold = 100 if coverage_thresold > 100

        hits.map{|hit|
                hit.each do |hsp|
                        coverage = get_coverage_subject(hsp)
                        if coverage > 100
                                next
                        end
                        if  coverage >= coverage_thresold
                                selected_hits << hit
                                break
                        end
                end   
        }
        return selected_hits
end
select_hits_by_evalue(hits, evalue) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 68
def select_hits_by_evalue(hits, evalue)
        selected_hits = []
        hits.map{|hit|
                hit.each do |hsp|
                        if hsp.e_val <= evalue 
                                selected_hits << hit
                        end
                end
        }
        return selected_hits
end
select_hits_by_identity_query(hits) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 54
def select_hits_by_identity_query(hits)
        selected_hits = []
        identity = hits.first.first.ident
        hits.map{|hit|
                hit.each do |hsp|
                        if hsp.ident >= identity
                                selected_hits << hit
                                break
                        end
                end
        }
        return selected_hits
end
select_hsps_by_id(hits, selected_ids) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 80
def select_hsps_by_id(hits, selected_ids)
        selected_hits = []
        hits.map{|hsp|
                if selected_ids.include?(hsp.acc)
                        selected_hits << hsp
                end
        }
        return selected_hits
end
select_representative(clusters_seqs_annot_prot) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 188
def select_representative(clusters_seqs_annot_prot)
        seqs = []      
        clusters_seqs_annot_prot.each do |cluster|
                if !cluster.first.coverage_analysis.empty? # filtering by mapping coverage
                        max_transcript_mean_coverage = cluster.map{|seq| seq.coverage_analysis[3] }.max - 0.05 # Relaxed limit of 5%
                        cluster.select!{|seq| seq.coverage_analysis[3] >= max_transcript_mean_coverage}
                end
                seq = cluster.select{|s| s.type == COMPLETE}.sort{|fl1, fl2| fl2.seq_fasta <=> fl1.seq_fasta}.first # Take longest full-length, s -> sequence, fl -> full-lentgh
                if seq.nil?
                        cluster.sort!{|cl1, cl2| cl2.get_pident <=> cl1.get_pident}
                        best_pident = cluster.first.get_pident
                        seq = cluster.select{|s| s.get_pident == best_pident}.sort{|s1, s2| s2.seq_fasta <=> s1.seq_fasta}.first
                end
                seqs << seq
        end
        return seqs
end
select_seqs_more_500pb(seqs_array) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 132
def select_seqs_more_500pb(seqs_array)
        seqs = seqs_array.select{|seq| seq.fasta_length > 500 }        
        return seqs
end
select_seqs_with_name(array_seqs, array_names) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 137
def select_seqs_with_name(array_seqs, array_names)
        seqs = array_seqs.select{|seq| array_names.include?(seq.seq_name)}
        return seqs
end
set_thresold_evalue(hits) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 90
def set_thresold_evalue(hits)
        evalue = 100
        hits.map{|hit|
                if hit.e_val != 0 && hit.e_val < evalue
                        evalue = hit.e_val
                end
        }
        if evalue == 100
                evalue = 0
        else
                exp = Math.log10(evalue).abs.to_i
                min_exp = (exp/10.0).ceil
                evalue = 10.0**-(exp-min_exp)
        end
        return evalue
end
subject_overlapping_hsps(hit) click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 254
def subject_overlapping_hsps(hit)
        overlapping = FALSE
        hsp_table = hsps_relationship_subject(hit)
        if !hsp_table.empty?
                hit = clean_hsp_by_identity(hit, 55)
                if hit.empty?
                        overlapping = TRUE
                else 
                        hsp_table = hsps_relationship_subject(hit)
                        if !hsp_table.empty?
                                overlapping = TRUE
                        end  
                end
        end
        return hit, overlapping
end
write_fasta(seqs_array, file_name, mode) click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 142
def write_fasta(seqs_array, file_name, mode)
        file=File.open(file_name, mode)
        seqs_array.each do |seq|
                file.puts ">#{seq.seq_name}\n#{seq.seq_fasta}"
        end
        file.close
end