class Sequence

Attributes

area_without_annotation[RW]
coverage_analysis[RW]
db[RW]
db_name[RW]
fasta_length[RW]
fpkm[RW]
functional_annotations[RW]
hit[RW]
id[RW]
ignore[RW]
orfs[RW]
save_fasta[RW]
seq_aa[RW]
seq_fasta[RW]
seq_name[RW]
seq_nt[RW]
status[RW]
t_code[RW]
type[RW]

Public Class Methods

new(seq_name, seq_fasta, seq_qual='') click to toggle source
# File lib/full_lengther_next/sequence.rb, line 10
def initialize(seq_name, seq_fasta, seq_qual='')
        @seq_name = seq_name
        @seq_fasta = seq_fasta
        @fasta_length = seq_fasta.length
        @db_name = nil
        @seq_nt = nil # Unigen sequence with tagged ATG & stop
        @seq_aa = nil # Protein sequence generated over unigen
        @db =nil
        @type = UNKNOWN # See types.rb
        @status = FALSE # TRUE => Sure, FALSE => Putative
        @id = nil #Prot or EST id, can be several => array
        @warnings = []
        @annotations=[]
        @functional_annotations = {}
        @orfs=[]

        #Mapping_info
        @fpkm = []
        @coverage_analysis = []
        
        @area_without_annotation=FALSE
        @save_fasta=TRUE
        @ignore=FALSE 
        @hit=nil
        @t_code=0
end

Public Instance Methods

add_orf(orf_seq, orf_t_start, orf_t_end, orf_q_frame, orf_stop_codon, orf_type) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 38
def add_orf(orf_seq, orf_t_start, orf_t_end, orf_q_frame, orf_stop_codon, orf_type)
        orf = Orf.new(orf_seq, orf_t_start, orf_t_end, orf_q_frame, orf_stop_codon, orf_type)
        @orfs << orf
end
all_warns() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 201
def all_warns
        all = @warnings.join(' ')
        return all
end
area_without_annotation?() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 464
def area_without_annotation?
        if @hit.class == Array
                hit = @hit.first
        else
                hit = @hit
        end
        upstream_annotation_space = hit.q_beg 
        downstream_annotation_space = @fasta_length - hit.q_end
        if upstream_annotation_space >= 150 || downstream_annotation_space >= 150
                @area_without_annotation = TRUE
        end
        return @area_without_annotation
end
calification() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 282
def calification
        type_description = nil
        case @type
                when FAILED
                        type_description = 'Failed'
                when OTHER
                        type_description = 'Other'
                when UNMAPPED
                        type_description = 'Unmapped'
                when CHIMERA
                        type_description = 'Chimera'
                when MISASSEMBLED
                        type_description = 'Misassembled'
                when UNKNOWN
                        type_description = 'Unknown'
                when COMPLETE
                        type_description = 'Complete'
                when N_TERMINAL 
                        type_description = 'N_terminal'
                when C_TERMINAL
                        type_description = 'C_terminal'
                when INTERNAL
                        type_description = 'Internal'
                when CODING
                        type_description = 'Coding'
                when NCRNA
                        type_description = 'NcRNA'
        end

end
change_degenerated_nt!() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 44
def change_degenerated_nt!     
        translate_hash = {}
        translate_hash['R']= [['a','g'],0]
        translate_hash['W']= [['a','t'],0]
        translate_hash['M']= [['a','c'],0]
        translate_hash['K']= [['g','t'],0]
        translate_hash['S']= [['g','c'],0]
        translate_hash['Y']= [['c','t'],0]
        translate_hash['H']= [['a','t','c'],0]
        translate_hash['B']= [['g','t','c'],0]
        translate_hash['D']= [['g','a','t'],0]
        translate_hash['V']= [['g','a','c'],0]
        translate_hash['N']= [['g','a','c','t'],0]


        fix_degenerated_fasta!(translate_hash)                
end
check_warn(warn) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 128
def check_warn(warn)
        check = warn
        replace = nil
        if warn.class.to_s == 'Array'
                check = warn.shift # Take warning tag message
                replace = warn # Take values to replace in message
        end

        message = $warnings_hash[check]
        if message.nil?
                message = check # If not exists the message
        end

        if !replace.nil?
                message = message.dup # Duplicate memory to avoid overwrite original warning hash messages
                replace.each do |rep|
                        message.sub!('(*replace*)',"#{rep}") #message variable
                end
        end
        return message
end
clean_annotations() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 96
def clean_annotations
        @annotations = []
end
clean_orfs() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 83
def clean_orfs
        @orfs=[]
end
clean_warnings() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 92
def clean_warnings
        @warnings = []
end
clone() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 478
def clone
        new_seq = self.dup
        new_seq.clean_annotations
        new_seq.clean_warnings
        new_seq.clean_orfs
        new_seq.clone_warnings(@warnings)
        new_seq.clone_annotations(@annotations)
        return new_seq
end
clone_annotations(array_annotations) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 488
def clone_annotations(array_annotations)
        array_annotations.map{|annotation| @annotations << annotation.dup}
end
clone_warnings(array_warnings) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 124
def clone_warnings(array_warnings)
        array_warnings.map{|warn| @warnings << warn.dup}
end
fix_degenerated_fasta!(translate_hash) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 63
def fix_degenerated_fasta!(translate_hash)
        s = @seq_fasta
        res = []

        nts_of_a_line = s.split('')

        nts_of_a_line.map{|e|

                if (e =~ /[RWMKSYHBDVN]/)
                        translate_hash[e][1] += 1
                        e = translate_hash[e][0][translate_hash[e][1]%translate_hash[e][0].length]
                end

                res << e

        }

        @seq_fasta=res.compact.join
end
format_chimera!() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 110
def format_chimera!
        @hit = []
end
get_acc() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 100
def get_acc
        acc=hit.acc
        return acc
end
get_fasta(seq) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 157
def get_fasta(seq)
        fasta = ">#{@seq_name}\n#{seq}"
        return fasta
end
get_pident() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 105
def get_pident
        pident=hit.ident
        return pident
end
print_alignment(aa_align, nt_align, color, show_nts, mark_subzone = nil) click to toggle source
prot_annot_calification() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 313
def prot_annot_calification
        info = "#{calification} "     
        if @status
                info << 'Sure'
        else
                info << 'Putative'
        end
        return info
end
reduce_coordinates(subzone_align, aa_align, h) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 454
def reduce_coordinates(subzone_align, aa_align, h)
        if !subzone_align.nil?
                aligned = 3 * aa_align.length + h.q_frame-1
                subzone_align.values.each do |subzone|
                        subzone[0]-= aligned 
                        subzone[1]-= aligned 
                end
        end
end
reset_classification() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 87
def reset_classification
        @type = UNKNOWN
        @status = FALSE
end
show_alignment(h, nts, show_nts, original_query_coordinates = nil) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 323
def show_alignment(h, nts, show_nts, original_query_coordinates = nil)
        puts "Prot id:\t#{h.acc}", "Alignment length:\t#{h.align_len} aa", "Subject   length:\t#{h.s_len} aa", "Query length:\t#{nts.length/3} aa"
        puts prot_annot_calification
        puts

        aa_unigen = nts[h.q_frame - 1 .. nts.length-1].translate
        index = contenidos_en_prot(h.q_seq, aa_unigen)

        # View desplacements 5-prime/align/3-prime
        subzone_align = nil
        if !original_query_coordinates.nil?
                subzone_align = {}
                if h.q_beg > original_query_coordinates.first #alignment has transferred characters to 5 prime
                        subzone_align['beg'] = [original_query_coordinates.first, h.q_beg-3, 42] # -3 to exclude the last aa
                elsif h.q_beg < original_query_coordinates.first
                        subzone_align['beg'] = [h.q_beg, original_query_coordinates.first-3, 46] #alignment has received characters from 5 prime
                end

                if h.q_end < original_query_coordinates.last #alignment has transferred characters to 3 prime
                        subzone_align['end'] = [h.q_end, original_query_coordinates.last, 42]
                elsif h.q_end > original_query_coordinates.last
                        subzone_align['end'] = [original_query_coordinates.last, h.q_end, 43] #alignment has received characters from 3 prime
                end
        end

        # Print 5 prime
        if index > 0 # 5 prime  exists
                aa_align = aa_unigen[0 .. index-1].split('')
                nt_align = nts[h.q_frame-1..h.q_beg-1]
                print_alignment(aa_align, nt_align, 36, show_nts, subzone_align)
                reduce_coordinates(subzone_align, aa_align, h)
        end

        # Print core alignment or protein
        aa_align = h.q_seq.split('')
        nt_align = nts[h.q_beg..h.q_end]
        print_alignment(aa_align, nt_align, 32, show_nts, subzone_align)
        reduce_coordinates(subzone_align, aa_align, h)

        # Print 3 prime
        gaps = h.q_seq.count('-')
        three_prime_beg = index+h.q_seq.length-gaps
        if aa_unigen.length > three_prime_beg # 3 prime  exists
                aa_align = aa_unigen[three_prime_beg .. aa_unigen.length-1].split('')
                fs = check_frame_shift(h)
                nt_align = nts[h.q_end+1-fs..nts.length-1]
                print_alignment(aa_align, nt_align, 33, show_nts, subzone_align)
        end

end
test_code(test_code) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 150
def test_code(test_code)
        @t_code = test_code
        if @t_code >= 0.95
                @status = TRUE
        end
end
unmapped?() click to toggle source
# File lib/full_lengther_next/sequence.rb, line 492
def unmapped?
        res = FALSE
        res = TRUE if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
        return res
end
warnings(warn) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 114
def warnings(warn)
        if warn.class.to_s == 'Array'
                warn.each do |w|
                        @warnings << check_warn(w)
                end
        else
                @warnings << check_warn(warn)
        end
end
write_align(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 265
def write_align(file)
        tabs = (seq_name.length/8).ceil
        if tabs == 0
                tabs = 1
        end
        second_tab = 0
        if seq_name.length > 7
                second_tab = 1
        end
        file.puts "#{@seq_name}#{"\t"*tabs}#{@hit.q_seq}\n#{@hit.acc}#{"\t"*(tabs+second_tab)}#{@hit.s_seq}"
        file.puts
end
write_chimera(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 214
def write_chimera(file) #TODO : write 'SOLVED' tag
        @hit.each do |h|
                file.puts "#{@seq_name}\t#{@fasta_length}\t#{h.acc}\t#{@db_name}\t#{h.q_frame}\t#{h.e_val}\t#{h.ident}\t#{h.q_beg + 1}\t#{h.q_end + 1}\t#{h.s_beg + 1}\t#{h.s_end + 1}\t#{h.definition}"
        end
        file.puts
end
write_coding(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 251
def write_coding(file)
        # ħit is an array. 2 => q_frame, 1 ORF end, 0 ORF beg
        calification = 'Putative'
        if @status
                calification = 'Sure'
        end
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{calification}\t#{@t_code}\t#{@hit.last}\t#{@hit.first}\t#{@hit[1]}"
end
write_info(output_files) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 162
def write_info(output_files) # Output_files is a hash
        if @save_fasta
                output_files['seqs'].puts get_fasta(@seq_fasta)      
        end
        if !@fpkm.empty?
                output_files['fpkm'].puts "#{@seq_name}\t#{@fpkm.join("\t")}"
        end
        if !@coverage_analysis.empty?
                all_coverages = @coverage_analysis.last
                output_files['coverage'].puts "#{@seq_name}\t#{@coverage_analysis[0..@coverage_analysis.length-1].join("\t")}"
        end

        case @type
        when OTHER
                write_other(output_files[@type])
        when CHIMERA
                write_chimera(output_files[@type])
        when UNMAPPED
                write_unmapped(output_files[@type])
        when MISASSEMBLED
                write_misassembled(output_files[@type])
        when UNKNOWN
                write_unknown(output_files[@type])
        when COMPLETE .. INTERNAL
                write_prot_annot(output_files['db'])
                write_prot_seq(output_files['prot'])
                write_nt_seq(output_files['nts'])
                write_align(output_files['align'])
        when NCRNA
                write_ncrna(output_files[@type])
        when CODING
                write_coding(output_files[@type])
        else
                if @type != FAILED
                        raise "#{@type} is an incorrect type"
                end
        end
end
write_misassembled(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 221
def write_misassembled(file)
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}"
end
write_ncrna(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 247
def write_ncrna(file)
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@hit.e_val}\t#{@hit.ident}\t#{@hit.q_beg + 1}\t#{@hit.q_end + 1}\t#{@hit.s_beg + 1}\t#{@hit.s_end + 1}\t#{@hit.definition}"
end
write_nt_seq(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 278
def write_nt_seq(file)
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@seq_nt}"
end
write_other(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 206
def write_other(file)
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}\t#{all_warns}"
end
write_prot_annot(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 239
def write_prot_annot(file)
        final_func_annot = Array.new(9, '-')
        if !@functional_annotations.empty?
                final_func_annot = @functional_annotations.values
        end
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}\t#{prot_annot_calification}\t#{@hit.e_val}\t#{@hit.ident}\t#{@hit.full_subject_length}\t#{@seq_aa.length}\t#{all_warns}\t#{@hit.q_frame}\t#{@hit.q_beg + 1}\t#{@hit.q_end + 1}\t#{@hit.s_beg + 1}\t#{@hit.s_end + 1}\t#{@hit.definition}\t#{final_func_annot.join("\t")}"
end
write_prot_seq(file) click to toggle source

Write complementary files

# File lib/full_lengther_next/sequence.rb, line 261
def write_prot_seq(file)
        file.puts get_fasta(@seq_aa)
end
write_unknown(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 225
def write_unknown(file)
        # ħit is an array. 2 => q_frame, 1 ORF end, 0 ORF beg
        if hit.class.to_s == 'Array'
                orf_beg = @hit[0]
                orf_end = @hit[1]
                q_frame = @hit[2]            
        else 
                orf_beg = '-'
                orf_end = '-'
                q_frame = '-'
        end
        file.puts "#{@seq_name}\t#{@fasta_length}\t#{@t_code}\t#{all_warns}\t#{q_frame}\t#{orf_beg}\t#{orf_end}"
end
write_unmapped(file) click to toggle source
# File lib/full_lengther_next/sequence.rb, line 210
def write_unmapped(file)
        file.puts "#{@seq_name}\t#{@fasta_length}"
end