module FlnStats

Constants

REPORT_FOLDER

Public Instance Methods

add_percentages_by_scalar(table, col, denominator) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 545
def add_percentages_by_scalar(table, col, denominator)
        table.each_with_index do |row, i|
                next if i == 0 #Skip header
                perc = row[col]*100.0/denominator
                if !perc.nan? && perc.infinite?.nil?
                        percentage = '%.2f' % perc.to_s 
                        percentage += '%'
                else
                        percentage ='-'
                end
                row << percentage
        end
end
add_percentages_by_vector(table, col, denominators) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 530
def add_percentages_by_vector(table, col, denominators)
        table.each_with_index do |row, i|
                next if i == 0 #Skip header
                den = denominators[i-1]
                perc = row[col]*100.0/denominators[i-1] if den > 0
                if den > 0 && !perc.nan? && (perc).infinite?.nil? 
                        percentage = '%.2f' % perc.to_s 
                        percentage += '%'
                else
                        percentage ='-'
                end
                row << percentage
        end
end
calculate_n50_n90(stats_hash, f_tot_key, n50_key, n90_key, seq_lengths) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 240
def calculate_n50_n90(stats_hash, f_tot_key, n50_key, n90_key, seq_lengths)
        f_tot_lengths = stats_hash[f_tot_key].to_f
        cum = 0
        seq_lengths.sort!{|a, b| b <=> a}
        seq_lengths.each do |length|
                cum += length
                if cum / f_tot_lengths > 0.5 && stats_hash[n50_key] == 0
                        stats_hash[n50_key] = length
                elsif cum / f_tot_lengths > 0.9
                        stats_hash[n90_key] = length
                        break
                end
        end
end
coding_stats_reptrans(coding_seq, stats_hash) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 280
def coding_stats_reptrans(coding_seq, stats_hash)
        group = nil
        if coding_seq.t_code > 1
                group = 'coding_>1'
        elsif coding_seq.t_code > 0.95
                group = 'coding_>0.94'
        elsif coding_seq.t_code > 0.85
                group = 'coding_>0.84'
        elsif coding_seq.t_code > 0.73
                group = 'coding_>0.73'
        elsif coding_seq.t_code > 0
                group = 'coding_>0'
        end
        if !group.nil?
                stats_hash[group] += 1
        end
end
get_taxonomy(name, taxonomy) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 67
def get_taxonomy(name, taxonomy)
        organism = nil
        if name.include?('OS=')
                fields = name.split('OS=',2)
                organism = fields.last.split(' GN=').first.strip
        elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
                name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
                if !$1.nil?
                        organism = $1
                else
                        name =~ /(\w+ \w+) \(([\w ]+)\)/
                        if !$1.nil?
                                organism = $1  
                        end
                end
        else
                organism = name.split(";",2).last
                organism = organism.split('.', 2).first
                organism.gsub!(/\(\D+\)/,'')
                if organism.split(' ').length > 1
                        organism.gsub!('.','')
                        organism.gsub!(/^ /,'')
                        organism.gsub!('  ','')
                        organism.strip!
                end
        end
        if !organism.nil?
                organism = organism.split(' ')[0..1].join(' ')
                if taxonomy[organism].nil?
                        taxonomy[organism] = 1
                else
                        taxonomy[organism] += 1
                end  
        end
end
handle_data_main_summary(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 298
def handle_data_main_summary(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs)
        container = {}

        identation = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'
        # GENERAL REPORT TABLE
        #-------------------------------------------------------
        general_report = [ 
                ['', 'Sequences', '%'],
                ['Input', stats_hash['input_seqs']],
                [identation + 'N50 (bp)', stats_hash['PRE_FLN_n50']],
                [identation + 'N90 (bp)', stats_hash['PRE_FLN_n90']],
                [identation + 'Full transcriptome length (bp)', stats_hash['PRE_FLN_full_transcriptome_length']],
                [identation + 'Mean sequence length (bp)', '%.2f' % stats_hash['PRE_FLN_mean_length']],
                [identation + 'Nucleotide indeterminations (bp)', stats_hash['PRE_FLN_indeterminations']],
                [identation + 'Mean indetermination length (bp)', '%.2f' % stats_hash['PRE_FLN_indetermination_mean_length']],
                [identation + 'Unigenes >500pb', stats_hash['PRE_FLN_sequences_>500']],
                [identation + 'Failing sequences', stats_hash['failed']],
                [identation + 'Artifacts <sup>1</sup>', stats_hash['artifacts']], 
                [identation*2 + 'Unmapped transcripts', stats_hash['unmapped']], 
                [identation*2 + 'Misassembled', stats_hash['misassembled']], 
                [identation*2 + 'Chimeras', stats_hash['chimeras']],
                [identation*2 + 'Other', stats_hash['other_artifacts']],
                ['Sequences with resolved chimeras', stats_hash['output_seqs']],
                ['Sequences without artifacts', stats_hash['good_seqs']],
                [identation + 'N50 (bp)', stats_hash['n50']],
                [identation + 'N90 (bp)', stats_hash['n90']],
                [identation + 'Full transcriptome length (bp)', stats_hash['full_transcriptome_length']],
                [identation + 'Mean sequence length (bp)', '%.2f' % stats_hash['mean_length']],
                [identation + 'Nucleotide indeterminations (bp)', stats_hash['indeterminations']],
                [identation + 'Mean indetermination length (bp)', '%.2f' % stats_hash['indetermination_mean_length']]
        ]
        denominators = [ 
                stats_hash['input_seqs'],
                0,
                0,
                0,
                0,
                stats_hash['PRE_FLN_full_transcriptome_length'],
                0,
                stats_hash['input_seqs'],
                stats_hash['output_seqs'],
                stats_hash['output_seqs'],
                stats_hash['artifacts'],
                stats_hash['artifacts'],
                stats_hash['artifacts'],
                stats_hash['artifacts'], 
                stats_hash['input_seqs'],
                stats_hash['output_seqs'],
                0,
                0,
                0,
                0,
                stats_hash['full_transcriptome_length'],
                0
        ]
        add_percentages_by_vector(general_report, 1, denominators)
        general_report << ['BA index', "%5.2f" % [stats_hash['BA_index']], '-'] if stats_hash['BA_index'] > 0

        # ASSEMBLY REPORT TABLE
        #-------------------------------------------------------
        without_orthologue = stats_hash['coding']+ stats_hash['unknown']
        assembly_report = [
                ['', 'Unigenes', '%'],
                ['Unigenes', stats_hash['good_seqs']],
                ['Unigenes >500pb', stats_hash['sequences_>500']],
                ['Unigenes >200pb', stats_hash['sequences_>200']],
                ['Longest unigene', stats_hash['longest_unigene']],
                ['With orthologue <sup>1</sup>', stats_hash['prot_annotated']],
                [identation + 'Different orthologue IDs', stats_hash['different_orthologues']],
                [identation + 'Complete transcripts', stats_hash['complete']],
                [identation + 'Different complete transcripts', stats_hash['different_completes']],
                ['ncRNA', stats_hash['ncrna']],
                ['Without orthologue <sup>1</sup>', without_orthologue],
                [identation + 'Coding (all)', stats_hash['coding']],
                [identation + 'Coding > 200bp', stats_hash['coding_>200']],
                [identation + 'Coding > 500bp', stats_hash['coding_>500']],
                [identation + 'Unknown (all)', stats_hash['unknown']],
                [identation + 'Unknown > 200bp', stats_hash['unknown_>200']],
                [identation + 'Unknown > 500bp', stats_hash['unknown_>500']]
        ]
        denominators = [
                stats_hash['good_seqs'],
                stats_hash['good_seqs'],
                stats_hash['good_seqs'],
                0,
                stats_hash['good_seqs'],
                stats_hash['prot_annotated'],
                stats_hash['prot_annotated'],
                stats_hash['prot_annotated'],
                stats_hash['good_seqs'],
                stats_hash['good_seqs'],
                without_orthologue,
                without_orthologue,
                without_orthologue,
                without_orthologue,
                without_orthologue,
                without_orthologue
        ]
        add_percentages_by_vector(assembly_report, 1, denominators)

        # STRUCTURAL PROFILE
        #-------------------------------------------------------
        structural_data = [
                ['Category', 'Sure', 'Putative'],
                ['Unknown', stats_hash['unknown'], 0],
                ['Complete', stats_hash['complete_sure'], stats_hash['complete_putative']],
                ['N-terminal', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative']], 
                ['C-terminal', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative']],
                ['Internal', stats_hash['internal'], 0],
                ['ncrna', stats_hash['ncrna'], 0],
                ['Coding', stats_hash['coding'], stats_hash['coding_putative']]
        ]
        structural_data.each_with_index do |row, i|
                row.each_with_index do |field, j|
                        structural_data[i][j] = field*100.0/stats_hash['good_seqs'] if i > 0 && j > 0 && structural_data[i][j] > 0                          
                end
        end

        # STATUS REPORT
        #----------------------------------------------------------
        status_report = [
                ['Status', 'colspan', 'Unigenes', '%'],
                ['Complete', 'Sure', stats_hash['complete_sure']],
                ['rowspan', 'Putative', stats_hash['complete_putative']],
                ['C-terminus', 'Sure', stats_hash['c_terminal_sure']],
                ['rowspan', 'Putative', stats_hash['c_terminal_putative']],
                ['N-terminus', 'Sure', stats_hash['n_terminal_sure']],
                ['rowspan', 'Putative', stats_hash['n_terminal_putative']],
                ['Internal', 'colspan', stats_hash['internal']],
                ['Coding', 'Sure', stats_hash['coding_sure']],
                ['rowspan', 'Putative', stats_hash['coding_putative']],
                ['ncRNA', 'colspan', stats_hash['ncrna']],
                ['Unknown', 'colspan', stats_hash['unknown']],
                ['Total', 'colspan', stats_hash['good_seqs']],
        ]
        add_percentages_by_scalar(status_report, 2, stats_hash['good_seqs'])

        # TAXONOMY PROFILE
        #-------------------------------------------------------
        taxonomy = [
                ['Organism', 'Annotations']
        ].concat(stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20])

        # TAXONOMY PROFILE
        #-------------------------------------------------------
        database_report = [
                ['', 'Unigenes', '%'],
                ['UserDB', stats_hash['userdb']],
                ['SwissProt', stats_hash['swissprot']],
                ['TrEMBL', stats_hash['trembl']],
                ['ncRNA', stats_hash['ncrna']],
                ['None', stats_hash['coding']+ stats_hash['unknown']],
                ['Total', stats_hash['good_seqs']]
        ]
        add_percentages_by_scalar(database_report, 1, stats_hash['good_seqs'])

        # GO ANNOTATION
        #-------------------------------------------------------
        container.merge!(go_for_graph(stats_functional_annotation_by_seqs))

        # BUILD CONTAINER
        #-------------------------------------------------------
        container[:general_report] = general_report
        container[:assembly_report] = assembly_report
        container[:structural_data] = structural_data
        container[:status_report] = status_report
        container[:taxonomy] = taxonomy
        container[:database_report] = database_report
        return container
end
handle_data_reptrans_summary(stats_hash) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 470
def handle_data_reptrans_summary(stats_hash)
        # GENERAL REPORT
        #-------------------------------------------------------
        all_seqs = 0
        stats_hash.values.map{|v| all_seqs += v}
        general_report = [
                ['', 'Sequences', '%'],
                ['Output', all_seqs],
                ['Annotated with protein', stats_hash['prot_annotated']],
                ['Annotated with EST', stats_hash['est_annotated']],
                ['Coding test-code > 1', stats_hash['coding_>1']],
                ['Coding test-code > 0.94', stats_hash['coding_>0.94']],
                ['Coding test-code > 0.84', stats_hash['coding_>0.84']],
                ['Coding test-code > 0.73', stats_hash['coding_>0.73']],
                ['Coding test-code > 0', stats_hash['coding_>0']]
        ]
        add_percentages_by_scalar(general_report, 1, all_seqs)

        # ACUMULATIVE REPORT
        #-------------------------------------------------------
        categories = [ 
                'Annotated with protein',
                'Annotated with EST',
                'Coding test-code > 1',
                'Coding test-code > 0.94',
                'Coding test-code > 0.84',
                'Coding test-code > 0.73',
                'Coding test-code > 0'
        ]
        values = [
                stats_hash['prot_annotated'],
                stats_hash['est_annotated'],
                stats_hash['coding_>1'],
                stats_hash['coding_>0.94'],
                stats_hash['coding_>0.84'],
                stats_hash['coding_>0.73'],
                stats_hash['coding_>0']
        ]
        acumulative = []
        acumulative << values.inject(0) { |result, element| 
                acumulative << result if result > 0 
                result + element 
        }
        report = []
        categories.each_with_index do |cat, i|
                report << [cat, acumulative[i]] 
        end
        acumulative_report = [
                ['', 'Sequences', '%'],
        ].concat(report)
        add_percentages_by_scalar(acumulative_report, 1, all_seqs)

        # BUILD CONTAINER
        #-------------------------------------------------------
        container = {}
        container[:general_report] = general_report
        container[:acumulative_report] = acumulative_report
        return container
end
initialize_stats_hash() click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 7
def initialize_stats_hash
        stats_hash = {
                'input_seqs' => 0,
                'output_seqs' => 0,
                'failed' => 0,
                'full_transcriptome_length' => 0,
                'PRE_FLN_full_transcriptome_length' => 0,
                'mean_length' => 0,
                'PRE_FLN_mean_length' => 0,
                'indeterminations' => 0,
                'PRE_FLN_indeterminations' => 0,
                'gap_number' => 0,
                'PRE_FLN_gap_number' => 0,
                'indetermination_mean_length' => 0,
                'PRE_FLN_indetermination_mean_length' => 0,
                'sequences_>200' => 0,
                'sequences_>500' => 0,
                'PRE_FLN_sequences_>500' => 0,
                'longest_unigene' => 0,
                'n50' => 0,
                'PRE_FLN_n50' => 0,
                'n90' => 0,
                'PRE_FLN_n90' => 0,
                'good_seqs' => 0,
                'artifacts' => 0,
                'misassembled' => 0,
                'chimeras' => 0,
                'unmapped' => 0,
                'other_artifacts' => 0,
                'unknown' => 0,
                'unknown_>200' => 0,
                'unknown_>500' => 0,
                'prot_annotated' => 0,
                'complete' => 0,
                'complete_sure' => 0,
                'complete_putative' => 0,
                'n_terminal' => 0,
                'n_terminal_sure' => 0,
                'n_terminal_putative' => 0,
                'c_terminal' => 0,
                'c_terminal_sure' => 0,
                'c_terminal_putative' => 0,
                'internal' => 0,
                'swissprot' => 0,
                'trembl' => 0,
                'userdb' => 0,
                'ncrna' => 0,
                'coding' => 0,
                'coding_sure' => 0,
                'coding_putative' => 0,
                'coding_>200' => 0,
                'coding_>500' => 0,
                'different_orthologues' => 0,
                'different_completes' => 0,
                'BA_index' => 0
        }             

        return stats_hash
end
initialize_stats_hash_reptrans() click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 103
def initialize_stats_hash_reptrans
        stats_hash = {
                'prot_annotated' => 0,
                'est_annotated' => 0,
                'coding_>1' => 0,
                'coding_>0.94' => 0,
                'coding_>0.84' => 0,
                'coding_>0.73' => 0,
                'coding_>0' => 0
        }
        return stats_hash
end
last_stats(stats_hash, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 255
def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths)
        stats_hash['different_orthologues'] = diff_ids_array.length
        stats_hash['different_completes'] = diff_ids_complete_array.length
        stats_hash['mean_length'] = stats_hash['full_transcriptome_length'].to_f / stats_hash['good_seqs'] if stats_hash['good_seqs'] > 0 
        stats_hash['indetermination_mean_length'] = stats_hash['indeterminations'].to_f / stats_hash['gap_number'] if stats_hash['gap_number'] > 0
        stats_hash['PRE_FLN_mean_length'] = stats_hash['PRE_FLN_full_transcriptome_length'].to_f / stats_hash['input_seqs'] if stats_hash['input_seqs'] > 0
        stats_hash['PRE_FLN_indetermination_mean_length'] = stats_hash['PRE_FLN_indeterminations'].to_f / stats_hash['PRE_FLN_gap_number'] if stats_hash['PRE_FLN_gap_number'] > 0

        calculate_n50_n90(stats_hash, 'full_transcriptome_length', 'n50', 'n90', seq_lengths)
        calculate_n50_n90(stats_hash, 'PRE_FLN_full_transcriptome_length', 'PRE_FLN_n50', 'PRE_FLN_n90', pre_fln_seq_lengths)

#BA index
    if stats_hash['prot_annotated'] > 0 && 
       stats_hash['complete'] > 0 && 
       stats_hash['sequences_>500'] > 0 && 
       stats_hash['different_orthologues'] > 0 && 
       stats_hash['different_completes'] > 0
        coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
        coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
               stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
       end

        return stats_hash
end
sequence_stats(seq, stats_hash) click to toggle source

Extract sequence stats

# File lib/full_lengther_next/fln_stats.rb, line 118
def sequence_stats(seq, stats_hash)
        nt_seq = seq.seq_fasta
        stats_hash['input_seqs'] += 1
        stats_hash['PRE_FLN_sequences_>500'] += 1 if nt_seq.length >= 500
        stats_hash['PRE_FLN_full_transcriptome_length'] += nt_seq.length
        stats_hash['PRE_FLN_indeterminations'] += (nt_seq.count('n') + nt_seq.count('N'))
        stats_hash['PRE_FLN_gap_number'] += nt_seq.scan(/[nN]+/).length
end
summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array, all_seq_lengths) click to toggle source

Build final stats

# File lib/full_lengther_next/fln_stats.rb, line 129
def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array, all_seq_lengths)
        low_limit = 200
        upper_limit = 500
        #All seqs
        #-----------
        stats_hash['output_seqs'] += seqs.length
        good_seqs = seqs.select{|s| s.type >= UNKNOWN}
        stats_hash['good_seqs'] += good_seqs.length

        #Indeterminations
        if !good_seqs.empty?
                stats_hash['indeterminations'] += good_seqs.map{|s| s.seq_fasta.count('n') + s.seq_fasta.count('N')}.inject { |sum, n| sum + n }
                stats_hash['gap_number'] += good_seqs.map{|s| s.seq_fasta.scan(/[nN]+/).length}.inject { |sum, n| sum + n }
        end

        #Longest_unigene
        current_longest_unigene = seqs.map{|s| s.fasta_length}.max
        if current_longest_unigene > stats_hash['longest_unigene']
                stats_hash['longest_unigene'] = current_longest_unigene
        end

        #Load ids
        seqs.map{|s| 
                if s.type > UNKNOWN && s.type < NCRNA
                        diff_ids_array << s.hit.acc
                end}
        diff_ids_array.uniq!

        #By Length
        if !good_seqs.empty?
                seq_lengths = good_seqs.map{|s| s.fasta_length }
                all_seq_lengths.concat(seq_lengths)
                stats_hash['full_transcriptome_length'] += seq_lengths.inject { |sum, n| sum + n }
                stats_hash['sequences_>200'] += seq_lengths.select{|l| l > low_limit}.length
                stats_hash['sequences_>500'] += seq_lengths.select{|l| l > upper_limit}.length
        end

        stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length

        #Unknown
        #-----------------------------
        all_unknown = seqs.select{|s| s.type == UNKNOWN}
        stats_hash['unknown'] += all_unknown.length

        #By Length
        stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
        stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length

        #Artifacts
        #----------------
        stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
        stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
        stats_hash['unmapped'] += seqs.select{|s| s.type == UNMAPPED}.length
        stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
        stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
        
        #Annotated with prot
        #---------------------
        prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
        stats_hash['prot_annotated'] += prot_annotated.length

        #By annotation
        stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
        complete = seqs.select{|s| s.type == COMPLETE}
        n_terminal = seqs.select{|s| s.type == N_TERMINAL}
        c_terminal = seqs.select{|s| s.type == C_TERMINAL}

        stats_hash['complete'] += complete.length
        stats_hash['n_terminal'] += n_terminal.length
        stats_hash['c_terminal'] += c_terminal.length

        #Load  complete ids
        complete.map{|s| diff_ids_complete_array << s.hit.acc}
        diff_ids_complete_array.uniq!

        #----> By Status
        stats_hash['complete_sure'] += complete.select{|s| s.status}.length
        stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
        stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
        stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
        stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
        stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
                
        #By database
        swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
        trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
        stats_hash['swissprot'] += swissprot
        stats_hash['trembl'] += trembl
        stats_hash['userdb'] += prot_annotated.length - swissprot - trembl

        #ncRNA
        #----------------
        stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length

        #Coding sequences
        #----------------
        coding = seqs.select{|s| s.type == CODING}
        stats_hash['coding'] += coding.length

        #By Status
        stats_hash['coding_sure'] += coding.select{|s| s.status}.length
        stats_hash['coding_putative'] += coding.select{|s| !s.status}.length

        #By Length
        stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
        stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length


        return stats_hash, diff_ids_array, diff_ids_complete_array, all_seq_lengths
end
table_title(title) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 609
def table_title(title)
        html =        '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
        return html
end
write_mapping_report(fpkm, coverage_analysis, stats_functional_annotation_by_seqs) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 569
def write_mapping_report(fpkm, coverage_analysis, stats_functional_annotation_by_seqs)
        if !fpkm.empty? && !coverage_analysis.empty? # REPORT Mapping
                container = go_for_graph(stats_functional_annotation_by_seqs, fpkm)
                measured_coverages = coverage_analysis.values.map{|c| [c[1], c[2]]}
                measured_coverages.sort!{|c1, c2| c2[1] <=> c1[1]}
                measured_coverages.each_with_index do |cov, i|
                        cov.unshift(i+1) # Puts x axis: 1, 2, 3 ... (seqs)
                end 
                measured_coverages.unshift(%w[transcripts mean_10max mean])
                container[:mean_coverage] = measured_coverages
                count = 0
                container[:max10_coverage] = coverage_analysis.values.sort{|c1, c2| c2[1] <=> c1[1]}.map{|c| count += 1; [count, c[1]]}
                container[:normalized_partial_coverage] = coverage_analysis.values.map{|c| [c[3], c[0]] }
                mean_cov_trasn_cov = coverage_analysis.values.map{|data| [data[3], data[2]]}
                mean_cov_trasn_cov.sort!{|i1, i2| i1[0] <=> i2[0]}
                mean_cov_trasn_cov.unshift(%w[trans_cov mean_coverage])
                container[:normalized_coverages_sorted_by_npc] = mean_cov_trasn_cov
                template = File.open(File.join(REPORT_FOLDER, 'mapping_summary.erb')).read
                report = Report_html.new(container, 'FLN Summary')
                report.build(template)
                report.write(File.join('fln_results', 'mapping_summary.html'))
        end
end
write_reptrans_stats(stats_hash, html_file, txt_file) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 593
def write_reptrans_stats(stats_hash, html_file, txt_file)
        txt = File.open(txt_file,'w')
        write_txt(stats_hash, txt)
        container = handle_data_reptrans_summary(stats_hash)
        template = File.open(File.join(REPORT_FOLDER, 'reptrans_summary.erb')).read
        report = Report_html.new(container, 'FLN Reptrans Summary')
        report.build(template)
        report.write(html_file)
end
write_summary_stats(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths, txt_file, html_file) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 559
def write_summary_stats(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths, txt_file, html_file)
        stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths)
        write_txt(stats_hash, txt_file)
        container = handle_data_main_summary(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs)
        template = File.open(File.join(REPORT_FOLDER, 'general_summary.erb')).read
        report = Report_html.new(container, 'FLN Summary')
        report.build(template)
        report.write(html_file)
end
write_txt(stats_hash, file) click to toggle source
# File lib/full_lengther_next/fln_stats.rb, line 603
def write_txt(stats_hash, file)
        stats_hash.each do |key, value|
                file.puts "#{value}\t#{key}"
        end
end