class ROCker
@author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com> @author Luis (Coto) Orellana @license artistic license 2.0 @update Sep-07-2015
@author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com> @author Luis (Coto) Orellana @license artistic license 2.0 @update Jun-08-2015
@author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com> @author Luis (Coto) Orellana @license artistic license 2.0 @update Sep-09-2015
Constants
- R_BIN
Attributes
Public Class Methods
CITATION(j = ' ')
click to toggle source
# File lib/rocker.rb, line 38 def self.CITATION(j = ' ') @@CITATION.join(j) ; end
DATE()
click to toggle source
# File lib/rocker.rb, line 37 def self.DATE; @@DATE ; end
VERSION()
click to toggle source
# File lib/rocker.rb, line 36 def self.VERSION; @@VERSION ; end
default(k)
click to toggle source
# File lib/rocker.rb, line 35 def self.default(k) @@DEFAULTS[k] ; end
defaults()
click to toggle source
# File lib/rocker.rb, line 34 def self.defaults() @@DEFAULTS ; end
ebirest()
click to toggle source
# File lib/rocker/step/build.rb, line 27 def self.ebirest() @@EBIREST ; end
has_build_gems?()
click to toggle source
# File lib/rocker/step/build.rb, line 28 def self.has_build_gems? return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil? @@HAS_BUILD_GEMS = true begin require "rubygems" require "restclient" rescue LoadError @@HAS_BUILD_GEMS = false end @@HAS_BUILD_GEMS end
new(opts)
click to toggle source
# File lib/rocker.rb, line 42 def initialize(opts) @o = ROCker.defaults opts.each{ |k,v| @o[k] = v } RInterface.R_BIN = opts[:r] unless opts[:r].nil? end
Public Instance Methods
bash(cmd, err_msg=nil)
click to toggle source
# File lib/rocker.rb, line 59 def bash(cmd, err_msg=nil) o = `#{cmd} 2>&1 && echo '{'` raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless o[-2]=="{" true end
blast2table(blast_f, table_f, aln, minscore)
click to toggle source
build!()
click to toggle source
[ Build ]¶ ↑
# File lib/rocker/step/build.rb, line 109 def build! # Check requirements puts "Testing environment." unless @o[:q] { searchcmd: :search, makedbcmd: :search, alignercmd: :aligner, alignerbin: :aligner, simulatorcmd: :simulator, simulatorbin: :simulator }.each_pair { |k,v| @o[k] = @o[k][@o[v]] if @o[k].is_a? Hash } @o[:nosearch]=true if @o[:nosimulate] raise "Unsatisfied requirements, please see the help message (-h)." unless ROCker.has_build_gems? protein_set = {} protein_set[:+] = ProteinSet.new(self,@o[:positive],@o[:posfile],@o[:aln]) protein_set[:-] = ProteinSet.new(self,@o[:negative],@o[:negfile]) raise "-p, -P, or -a are mandatory." if protein_set[:+].empty? raise "-o/--baseout is mandatory." if @o[:baseout].nil? if protein_set[:+].size==1 and not @o[:noaln] warn "\nWARNING: Positive set contains only one sequence, turning " + "off alignment.\n\n" @o[:noaln] = true end unless @o[:nosimulate] self.bash("#{@o[:simulatorbin]} --version", "--simulator-bin must be executable. Is Grinder installed?") if @o[:simulator]==:grinder end unless @o[:noaln] self.bash("#{@o[:alignerbin]} -version", "--aligner-bin must be executable. Is Muscle installed?") if @o[:aligner]==:muscle self.bash("#{@o[:alignerbin]} --version", "--aligner-bin must be executable. Is ClustalOmega installed?") if @o[:aligner]==:clustalo end unless @o[:nosearch] self.bash("#{@o[:searchbins]}makeblastdb -version", "--search-bins must contain executables. Is BLAST+ installed?") if @o[:search]==:blast self.bash("#{@o[:searchbins]}diamond --help", "--search-bins must contain executables. Is DIAMOND installed?") if @o[:search]==:diamond end # Download genes puts "Downloading gene data." unless @o[:q] ref_file = @o[:baseout] + ".ref.fasta" if not protein_set[:+].aln.nil? puts " * reusing aligned sequences as positive set." unless @o[:q] protein_set[:+].get_from_aln(ref_file, protein_set[:+].aln) @o[:noaln] = true elsif @o[:reuse] and File.size? ref_file puts " * reusing positive set: #{ref_file}." unless @o[:q] else puts " * downloading #{protein_set[:+].size} sequence(s) in " + "positive set." unless @o[:q] $stderr.puts " # #{protein_set[:+].ids}" if @o[:debug] protein_set[:+].download(ref_file) end [:+, :-].each do |set| unless protein_set[set].empty? puts " * linking genomes from #{protein_set[set].size} " + "[#{set.to_s}] sequence(s)." unless @o[:q] $stderr.puts " # #{protein_set[set].ids}" if @o[:debug] protein_set[set].get_genomes! end end raise "No genomes associated with the positive set." if protein_set[:+].genomes.empty? genome_set = {:+ => GenomeSet.new(self, protein_set[:+].genomes), :- => GenomeSet.new(self, protein_set[:-].genomes)} # Locate genes puts "Analyzing genome data." unless @o[:q] coords_file = @o[:baseout] + ".src.coords" if @o[:reuse] and File.size? coords_file puts " * reusing coordinates: #{coords_file}." unless @o[:q] c = JSON.parse File.read(coords_file), {symbolize_names:true} positive_coords = c[:positive_coords] negative_coords = c[:negative_coords] genome_set[:+].taxa = c[:taxa_pos] genome_set[:-].taxa = c[:taxa_neg] else all_coords = {} [:+, :-].each do |set_type| all_coords[set_type] = {} next if genome_set[set_type].empty? thrs = [@o[:thr], genome_set[set_type].size].min puts " * downloading and parsing #{genome_set[set_type].size} " + "GFF3 document(s) in #{thrs} threads." unless @o[:q] $stderr.puts " # Looking for translations: " + "#{protein_set[set_type].tranids_dump}" if @o[:debug] $stderr.puts " # Looking into: #{genome_set[set_type].ids}" if @o[:debug] # Launch threads thr_obj = [] (0 .. (thrs-1)).each do |thr_i| ids_to_parse = [] (0 .. (genome_set[set_type].size-1)).each do |i| ids_to_parse << protein_set[set_type].genomes[i] if (i % thrs) == thr_i end json_file = @o[:baseout] + ".src.coords." + thr_i.to_s + ".tmp" thr_obj << json_file fork do get_coords_from_gff3(ids_to_parse, protein_set[set_type], thr_i, json_file) end end # Combine results Process.waitall thr_obj.each do |t| raise "Thread failed without error trace: #{t}" unless File.exist? t o = JSON.parse(File.read(t), {symbolize_names:true}) o[:coords].each_pair do |k,v| all_coords[set_type][ k ] ||= [] all_coords[set_type][ k ] += v end File.unlink t end end # [:+, :-].each positive_coords = all_coords[:+] negative_coords = all_coords[:-] # Select one genome per taxon unless @o[:pertaxon].nil? puts " Selecting genomes by #{@o[:pertaxon]}." unless @o[:q] [:+,:-].each{ |set| genome_set[set].choose_genomes! @o[:pertaxon] } end # Save coordinates and taxa ofh = File.open(coords_file, "w") ofh.print JSON.pretty_generate({ positive_coords:positive_coords, negative_coords:negative_coords, taxa_pos:genome_set[:+].taxa, taxa_neg:genome_set[:-].taxa}) ofh.close end # if @o[:reuse] and File.size? coords_file ... else unless @o[:pertaxon].nil? puts " Using " + [:+,:-].map{ |set| genome_set[set].size }.reduce(:+).to_s + " genome(s) after filtering by #{@o[:pertaxon]}." unless @o[:q] end found = protein_set[:+].in_coords(positive_coords) raise "Cannot find the genomic location of any provided sequence." if found.nil? missing = protein_set[:+].ids - found unless missing.empty? warn "\nWARNING: Cannot find genomic location of #{missing.size} " + "sequence(s) #{missing.join(",")}.\n\n" unless @o[:keep_unlinked] del_genomes = protein_set[:+].remove_genomes_by_prot_id!(missing) warn "WARNING: Ignoring #{del_genomes.size} genomes with missing " + "coordinates #{del_genomes.join(",")}.\n\n" genome_set[ :+ ].delete!(del_genomes) end end # Download genomes genome_set[:all] = GenomeSet.new(self, genome_set[ :+ ].ids + genome_set[ :- ].ids) genomes_file = @o[:baseout] + ".src.fasta" if @o[:reuse] and File.size? genomes_file puts " * reusing existing file: #{genomes_file}." unless @o[:q] else raise "Something went wrong: Nothing left to download." if genome_set[:all].empty? puts " * downloading " + genome_set[:all].size.to_s + " genome(s) in FastA." unless @o[:q] $stderr.puts " # #{genome_set[:all].ids}" if @o[:debug] genome_set[:all].download genomes_file end # Generate metagenome unless @o[:nosimulate] puts "Generating in silico metagenome" unless @o[:q] if @o[:reuse] and File.size? @o[:baseout] + ".mg.fasta" puts " * reusing existing file: #{@o[:baseout]}.mg.fasta." unless @o[:q] else all_src = File.readlines("#{@o[:baseout]}.src.fasta" ).select{ |l| l =~ /^>/ }.size thrs = [@o[:thr], all_src].min thr_obj = [] seqs_per_thr = (all_src.to_f/thrs).ceil thrs = (all_src.to_f/seqs_per_thr).ceil puts " * simulating metagenomes and tagging positive reads in " + thrs.to_s + " threads." unless @o[:q] $stderr.puts " # #{positive_coords}" if @o[:debug] (0 .. (thrs-1)).each do |thr_i| output = @o[:baseout] + ".mg.fasta.#{thr_i.to_s}" thr_obj << output fork do seqs_a = thr_i*seqs_per_thr + 1 seqs_b = [seqs_a + seqs_per_thr - 1, all_src].min # Create sub-fasta ofh = File.open("#{@o[:baseout]}.src.fasta.#{thr_i.to_s}","w") ifh = File.open("#{@o[:baseout]}.src.fasta","r") seq_i = 0 while l = ifh.gets seq_i+=1 if l =~ /^>/ break if seq_i > seqs_b ofh.print l if seq_i >= seqs_a end ifh.close ofh.close # Run simulator (except if the temporal file is already # there and can be reused) ofile = "#{@o[:baseout]}.mg.tmp.#{thr_i.to_s}" bash sprintf(@o[:simulatorcmd], @o[:simulatorbin], "#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", @o[:seqdepth]*@o[:readlen].to_f, @o[:readlen], File.basename(ofile), File.dirname(ofile)) unless @o[:reuse] and File.size? @o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa" # Tag positive and negative reads puts " * tagging reads [thread #{thr_i}]." unless @o[:q] ifh = File.open(@o[:baseout] + ".mg.tmp.#{thr_i}-reads.fa", "r") ofh = File.open(@o[:baseout] + ".mg.fasta.#{thr_i}", "w") while l = ifh.gets if l =~ /^>/ rd = %r{ ^>(?<id>\d+)\s reference=[A-Za-z_]+[\|:] (?<genome_id>[A-Za-z0-9_]+)(?:\|.*)?\s position=(?<comp>complement\()?(?<from>\d+)\.\. (?<to>\d+)\)?\s }x.match(l) raise "Cannot parse simulated read's defline, are " + "you using Grinder?: #{l}" if rd.nil? positive = false positive_coords[rd[:genome_id].to_sym] ||= [] positive_coords[rd[:genome_id].to_sym].each do |gn| left = rd[:to].to_i - gn[:from] right = gn[:to] - rd[:from].to_i if (left*right >= 0) and ([left, right].min >= @o[:minovl]) positive = true break end end negative = false negative_coords[rd[:genome_id].to_sym] ||= [] negative_coords[rd[:genome_id].to_sym].each do |gn| left = rd[:to].to_i - gn[:from] right = gn[:to] - rd[:from].to_i if (left*right >= 0) and ([left, right].min >= @o[:minovl]) negative = true break end end l = ">#{thr_i.to_s}_#{rd[:id]}" + "#{positive ? "@%" : (negative ? "@$" : "")} " + "ref=#{rd[:genome_id]}:#{rd[:from]}..#{rd[:to]}" + "#{(rd[:comp]=="complement(") ? "-" : "+"}\n" end ofh.print l end ofh.close ifh.close end # fork end # (1 .. thrs).each Process.waitall # Concatenate results ofh = File.open(@o[:baseout] + ".mg.fasta", "w") thr_obj.each do |t| raise "Thread failed without error trace: #{t}" unless File.exist? t ifh = File.open(t, "r") while l = ifh.gets ofh.print l end ifh.close File.unlink t end ofh.close end end # unless @o[:nosimulate] # Align references unless @o[:noaln] puts "Aligning reference set." unless @o[:q] if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.aln" puts " * reusing existing file: #{@o[:baseout]}.ref.aln." unless @o[:q] else bash(sprintf(@o[:alignercmd], @o[:alignerbin], "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref.aln", @o[:thr])) puts " +--\n | IMPORTANT NOTE: Manually checking the alignment " + "before\n | the 'compile' step is *strongly* encouraged.\n " + "+--\n" unless @o[:q] end end # Run similarity search unless @o[:nosearch] puts "Running similarity search." unless @o[:q] if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.blast" puts " * reusing existing file: #{@o[:baseout]}.ref.blast." unless @o[:q] else puts " * preparing database." unless @o[:q] bash(sprintf(@o[:makedbcmd], @o[:searchbins], "prot", "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref")) puts " * running similarity search." unless @o[:q] bash(sprintf(@o[:searchcmd], @o[:searchbins], "blastx", "#{@o[:baseout]}.mg.fasta", "#{@o[:baseout]}.ref", "#{@o[:baseout]}.ref.blast", @o[:thr])) end end # Clean unless @o[:noclean] puts "Cleaning." unless @o[:q] sff = %w{.src.xml .src.fasta} sff += %w{.mg.tmp-reads.fa .mg.tmp-ranks.txt} unless @o[:nosimulate] sff += %w{.ref.phr .ref.pin .ref.psq} unless @o[:nosearch] sff.each { |sf| File.unlink @o[:baseout] + sf if File.exist? @o[:baseout] + sf } end end
compile!()
click to toggle source
[ Compile ]¶ ↑
# File lib/rocker/step/compile.rb, line 13 def compile! raise "-a/--alignment is mandatory." if @o[:aln].nil? raise "-a/--alignment must exist." unless File.exist? @o[:aln] raise "-l/--readlen is mandatory." if @o[:readlen].nil? if @o[:table].nil? raise "-T/--table is mandatory unless -b is provided." if @o[:blast].nil? or not File.exist? @o[:blast] @o[:table] = "#{@o[:blast]}.table" else @o[:reuse] = true end raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table] raise "-k/--rocker is mandatory." if @o[:rocker].nil? puts "Testing environment." unless @o[:q] bash("echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be executable. Is R installed?") bash("echo \"library('pROC')\" | #{@o[:r]} --vanilla", "Please install the 'pROC' library for R first.") puts "Reading files." unless @o[:q] puts " * loading alignment: #{@o[:aln]}." unless @o[:q] aln = Alignment.new aln.read_fasta @o[:aln] if @o[:reuse] and File.exist? @o[:table] puts " * reusing existing file: #{@o[:table]}." unless @o[:q] else puts " * generating table: #{@o[:table]}." unless @o[:q] blast2table(@o[:blast], @o[:table], aln, @o[:minscore]) end puts "Analyzing data." unless @o[:q] puts " * computing windows." unless @o[:q] data = ROCData.new(@o[:table], aln, @o[:win]) data.nucl = @o[:nucl] if @o[:refine] puts " * refining windows." unless @o[:q] warn "Insufficient hits to refine results." unless data.refine! @o[:table] end puts " * saving ROCker file: #{@o[:rocker]}." unless @o[:q] data.save(@o[:rocker], l: @o[:readlen]) end
correct_bs(bh, readlen, exp_readlen, max_corr, penalty)
click to toggle source
# File lib/rocker/step/filter.rb, line 60 def correct_bs(bh, readlen, exp_readlen, max_corr, penalty) bs = bh.bits return bs if @o[:lencorr].nil? or readlen.nil? or readlen >= exp_readlen bits_per_aa = bs.to_f / readlen if penalty.nil? or penalty > 1.0 extra = [0.0, readlen * (max_corr + 1.0) - exp_readlen].max max_tri = max_corr * readlen * bits_per_aa / 2 tanTheta = max_corr > 0.0 ? bits_per_aa / (max_corr * readlen) : 0.0 extra_tri = extra * extra * tanTheta / 2 return bs + (max_tri - extra_tri) else miss = [exp_readlen - readlen, max_corr * readlen].min return bs + (bits_per_aa * miss * (1.0 - penalty)) end end
ebiFetch(db, ids, format, outfile=nil)
click to toggle source
# File lib/rocker/step/build.rb, line 54 def ebiFetch(db, ids, format, outfile=nil) url = "#{ROCker.ebirest}/dbfetch/dbfetch/" + "#{db.to_s}/#{ids.join(",")}/#{format.to_s}" $stderr.puts url self.restcall url, outfile end
filter!(data=nil)
click to toggle source
[ Filter ]¶ ↑
# File lib/rocker/step/filter.rb, line 13 def filter!(data=nil) raise "-k/--rocker is mandatory." if @o[:rocker].nil? raise "-x/--query-blast is mandatory." if @o[:qblast].nil? raise "-o/--out-blast is mandatory." if @o[:oblast].nil? # Read ROCker file if data.nil? puts "Loading ROCker file: #{@o[:rocker]}." unless @o[:q] data = ROCData.new @o[:rocker] end readlengths = {} exp_readlen = 0 unless @o[:lencorr].nil? @o[:lencorr_max] ||= 0.4 raise "Unsigned length in model, please re-compile model to use -L" if data.signatures[:l].nil? exp_readlen = data.signatures[:l].to_i File.open(@o[:lencorr], 'r') do |fh| k = nil fh.each_line do |ln| if ln =~ /^>(\S+)/ k = $1 readlengths[k] = 0 else readlengths[k] += ln.chomp.size end end end end # Filter similarity search puts "Filtering similarity search: #{@o[:qblast]}." unless @o[:q] oh = File.open(@o[:oblast], 'w') File.open(@o[:qblast], 'r') do |ih| ih.each_line do |ln| bh = BlastHit.new(ln, data.aln) next if bh.sbj.nil? # <- When the hit is not against a known target bs = @o[:lencorr].nil? ? bh.bits : correct_bs(bh, readlengths[bh.qry], exp_readlen, @o[:lencorr_max], @o[:lencorr_pen]) oh.print ln if not(bh.sfrom.nil?) and bs >= data.win_at_col(bh.midpoint).thr end end oh.close end
get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
click to toggle source
# File lib/rocker/step/build.rb, line 60 def get_coords_from_gff3(genome_ids, pset, thread_id, json_file) coords = {} i = 0 genome_ids.each do |genome_id| print " * scanning #{(i+=1).ordinalize} genome out of " + "#{genome_ids.size} in first thread. \r" if thread_id==0 and not @o[:q] genome_file = @o[:baseout] + ".src." + genome_id + ".gff3" if @o[:reuse] and File.size? genome_file ifh = File.open(genome_file, "r") doc = ifh.readlines.grep(/^[^#]/) ifh.close else genome_file=nil unless @o[:noclean] doc = ebiFetch(:embl, [genome_id], :gff3, genome_file).split("\n").grep(/^[^#]/) if doc.first =~ /ERROR 12 No entries found/ doc = ebiFetch(:emblconexp, [genome_id], :gff3, genome_file).split("\n").grep(/^[^#]/) end end doc.each do |ln| next if ln =~ /^#/ r = ln.chomp.split /\t/ next if r.size < 9 prots = r[8].split(/;/).grep( /^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] } p = prots.compact.select{ |id| pset.ids.include? id }.first trans = r[8].split(/;/).grep( /^protein_id=/){ |pid| pid.split(/=/)[1] } t = trans.select{ |id| pset.tranids.include? id }.first next if p.nil? and t.nil? coords[ r[0].to_sym ] ||= [] coords[ r[0].to_sym ] << { prot_id: p, tran_id: t, from: r[3].to_i, to: r[4].to_i, strand: r[6] } end end print "\n" if thread_id==0 and not @o[:q] ofh = File.open(json_file, "w") ofh.print({coords:coords}.to_json) ofh.close end
plot!()
click to toggle source
[ Search ]¶ ↑
# File lib/rocker/step/plot.rb, line 16 def plot! raise "-k/--rocker is mandatory." if o[:rocker].nil? if @o[:table].nil? raise "-t/--table is mandatory unless -b is provided." if @o[:blast].nil? @o[:table] = "#{@o[:blast]}.table" end raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table] puts "Testing environment." unless @o[:q] bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be " + "executable. Is R installed?" # Source files puts "Reading files." unless @o[:q] puts " * loding ROCker file: #{@o[:rocker]}." unless @o[:q] data = ROCData.new @o[:rocker] if File.exist? @o[:table] puts " * reusing existing file: #{@o[:table]}." unless @o[:q] else puts " * generating table: #{@o[:table]}." unless @o[:q] blast2table(@o[:blast], @o[:table], data.aln, @o[:minscore]) end # Matches (middle panel) puts "Plotting matches." unless @o[:q] extra = @o[:gformat]=="pdf" ? "" : ", units='in', res=300" @o[:gout] ||= "#{@o[:rocker]}.#{@o[:gformat]}" data.rrun "#{@o[:gformat]}('#{@o[:gout]}', #{@o[:width]}, " + "#{@o[:height]}#{extra});" data.rrun "layout(c(2,1,3), heights=c(2-1/#{data.aln.size},3,1));" some_thr = data.load_table! @o[:table], @o[:sbj], @o[:minscore] data.rrun "par(mar=c(0,4,0,0.5)+.1);" data.rrun "plot(1, t='n', xlim=c(0.5,#{data.aln.cols}+0.5), " + "ylim=range(x$V4)+c(-0.04,0.04)*diff(range(x$V4)), xlab='', " + "ylab='Bit score', xaxs='i', xaxt='n');" data.rrun "noise <- runif(ncol(x),-.2,.2)" data.rrun "hit.col <- ifelse(x$V5==1, " + "rgb(0,0,.5,#{@o[:transparency] ? ".2" : "1"}), " + "rgb(.5,0,0,#{@o[:transparency] ? ".2" : "1"}))" data.rrun "hit.col[ x$V5==-1 ] <- " + "rgb(0.722,0.722,0,#{@o[:transparency] ? ".2" : "1"})" if @o[:tag_negatives] data.rrun "arrows(x0=x$V2, x1=x$V3, y0=x$V4+noise, lty=1, col=hit.col, " + "length=0);" data.rrun "points(x$V6, x$V4+noise, col=hit.col, pch=19, cex=1/4);" # Windows (middle panel) puts "Plotting windows." unless @o[:q] if some_thr data.rrun "arrows(x0=w$V1, x1=w$V2, y0=w$V5, lwd=2, length=0)" data.rrun "arrows(x0=w$V2[-nrow(w)], x1=w$V1[-1], " + "y0=w$V5[-nrow(w)], y1=w$V5[-1], lwd=2, length=0)" end data.rrun "legend('bottomright', legend=c('Match span'," + "'Match mid-point','Reference (+)'," + "#{"'Reference (-)'," if @o[:tag_negatives]}'Non-reference'), " + "lwd=c(1,NA,1,1,1), pch=c(NA,19,19,19,19), ncol=5, bty='n', " + "col=c('black','black','darkblue'," + "#{"rgb(.722,.722,0)," if @o[:tag_negatives]}'darkred'))" # Alignment (top panel) puts "Plotting alignment." unless @o[:q] data.rrun "par(mar=c(0,4,0.5,0.5)+0.1);" data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}), " + "ylim=c(1,#{data.aln.seqs.size}), xlab='', ylab='Alignment', " + "xaxs='i', xaxt='n', yaxs='i', yaxt='n', bty='n');" i = 0 data.rrun "clr <- rainbow(26, v=1/2, s=3/4);" if @o[:color] data.aln.seqs.values.each do |s| color = (s.aln.split(//).map do |c| c=="-" ? "'grey80'" : (@o[:sbj].include?(s.id) ? "'red'" : (@o[:color] ? "clr[#{c.ord-64}]" : "'black'")) end.join(",")) data.rrun "rect((1:#{data.aln.cols-1})-0.5, " + "rep(#{i}, #{data.aln.cols-1}), (1:#{data.aln.cols-1})+0.5, " + "rep(#{i+1}, #{data.aln.cols-1}), col=c(#{color}), border=NA);" i += 1 end # Statistics (bottom panel) puts "Plotting statistics." unless @o[:q] data.rrun "par(mar=c(5,4,0,0.5)+.1);" data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}), " + "ylim=c(#{@o[:ylim].nil? ? (@o[:impact] ? "-2,.1" : "50,100") : @o[:ylim]}), xlab='Alignment position (amino acids)', " + "ylab='Precision',xaxs='i');" if some_thr sn = data.rrun "100*sum(w$tp)/(sum(w$tp)+sum(w$fn))", :float sp = data.rrun "100*sum(w$tn)/(sum(w$fp)+sum(w$tn))", :float ac = data.rrun "100*(sum(w$tp)+sum(w$tn))/(sum(w$p)+sum(w$n))", :float unless @o[:q] puts " * sensitivity: #{sn}%" puts " * specificity: #{sp}%" puts " * accuracy: #{ac}%" end data.rrun "pos <- (w$V1+w$V2)/2" if @o[:impact] data.rrun "lines(pos[!is.na(w$specificity)], " + "(w$specificity[!is.na(w$specificity)]-#{sp})*" + "w$tp[!is.na(w$specificity)]/sum(w$tp), " + "col='darkred', lwd=2, t='o', cex=1/3, pch=19);" data.rrun "lines(pos[!is.na(w$sensitivity)], " + "(w$sensitivity[!is.na(w$sensitivity)]-#{sn})*" + "w$tn[!is.na(w$sensitivity)]/sum(w$tn), " + "col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);" data.rrun "lines(pos[!is.na(w$accuracy)], " + "(w$accuracy[!is.na(w$accuracy)]-#{ac})*" + "(w$tp+w$tn)[!is.na(w$accuracy)]/sum(c(w$tp, w$tn)), " + "col='darkblue', lwd=2, t='o', cex=1/3, pch=19);" else data.rrun "lines(pos[!is.na(w$specificity)], " + "w$specificity[!is.na(w$specificity)], col='darkred', " + "lwd=2, t='o', cex=1/3, pch=19);" data.rrun "lines(pos[!is.na(w$sensitivity)], " + "w$sensitivity[!is.na(w$sensitivity)], col='darkgreen', " + "lwd=2, t='o', cex=1/3, pch=19);" data.rrun "lines(pos[!is.na(w$accuracy)], " + "w$accuracy[!is.na(w$accuracy)], col='darkblue', lwd=2, " + "t='o', cex=1/3, pch=19);" end end data.rrun "legend('bottomright', " + "legend=c('Specificity','Sensitivity','Accuracy'), lwd=2, " + "col=c('darkred','darkgreen','darkblue'), ncol=3, bty='n')" data.rrun "dev.off();" end
restcall(url, outfile=nil)
click to toggle source
[ Utilities ]¶ ↑
# File lib/rocker/step/build.rb, line 41 def restcall(url, outfile=nil) $stderr.puts " # Calling: #{url}" if @o[:debug] response = RestClient::Request.execute(method: :get, url: url, timeout: 600) raise "Unable to reach EBI REST client, error code " + response.code.to_s + "." unless response.code == 200 unless outfile.nil? ohf = File.open(outfile, "w") ohf.print response.to_s ohf.close end response.to_s end
search!()
click to toggle source
[ Search ]¶ ↑
# File lib/rocker/step/search.rb, line 15 def search! raise "-k/--rocker is mandatory." if @o[:rocker].nil? raise "-i/--query is mandatory." if @o[:query].nil? @o[:lencorr] = @o[:query] unless @o[:lencorr].nil? # Check requirements puts "Testing environment." unless @o[:q] @o[:searchcmd] = @o[:searchcmd][@o[:search]] if @o[:searchcmd].is_a? Hash @o[:makedbcmd] = @o[:makedbcmd][@o[:search]] if @o[:makedbcmd].is_a? Hash self.bash "#{@o[:searchbins]}makeblastdb -version", "--search-bins must contain executables. Is BLAST+ installed?" if @o[:search]==:blast self.bash "#{@o[:searchbins]}diamond --help", "--search-bins must contain executables. Is DIAMOND installed?" if @o[:search]==:diamond # Run similarity search Dir.mktmpdir do |dir| @o[:qblast] ||= "#{dir}/blast" puts "Loading ROCker file: #{@o[:rocker]}." unless @o[:q] data = ROCData.new @o[:rocker] puts "Running similarity search." unless @o[:q] puts " * preparing database." unless @o[:q] ofh = File.new("#{dir}/ref.fasta", "w") ofh.print data.aln.to_seq_s ofh.close bash sprintf(@o[:makedbcmd], @o[:searchbins], 'prot', "#{dir}/ref.fasta", "#{dir}/ref") puts " * running similarity search." unless @o[:q] bash sprintf(@o[:searchcmd], @o[:searchbins], 'blastx', @o[:query], "#{dir}/ref", @o[:qblast], @o[:thr]) self.filter! data end end