module CoutinhoAssembly::Runners::Megahit
Public Instance Methods
clean_up_out_dir(zip_binary: nil, assembly_dir: nil, num_threads: nil)
click to toggle source
Removes the intermediate contigs and zips the final contigs. This is meant to be run on a completed assembly out dir.
# File lib/coutinho_assembly.rb, line 101 def clean_up_out_dir(zip_binary: nil, assembly_dir: nil, num_threads: nil) int_contig_dir = File.join assembly_dir, "intermediate_contigs" # Remove the intermediate contigs FileUtils.rm_r int_contig_dir if Dir.exist? int_contig_dir contig_glob = File.join assembly_dir, "*.contigs.fa" if zip_binary == "pigz" cmd = "#{zip_binary} -p #{num_threads} #{contig_glob}" else cmd = "#{zip_binary} #{contig_glob}" end # Zip the contigs file Process.run_it cmd end
log_diagnostic_files(assembly_dir, assembly_prefix)
click to toggle source
# File lib/coutinho_assembly.rb, line 20 def log_diagnostic_files assembly_dir, assembly_prefix megahit_opts_fname = File.join assembly_dir, "opts.txt" megahit_log_fname = File.join assembly_dir, "#{assembly_prefix}.log" [megahit_opts_fname, megahit_log_fname].each do |fname| if File.exist? fname contents = File.open(fname, "rt").read.chomp Rya::AbortIf.logger.error { contents } end end end
run(exe:, forward_reads: nil, reverse_reads: nil, single_reads: nil, out_dir: nil, out_prefix: nil, num_threads: 1, preset: nil)
click to toggle source
Retries once with continue then cleans up after itself so it can be restarted with a wrapper.
# File lib/coutinho_assembly.rb, line 34 def run(exe:, forward_reads: nil, reverse_reads: nil, single_reads: nil, out_dir: nil, out_prefix: nil, num_threads: 1, preset: nil) cmd = "#{exe} " \ "--num-cpu-threads #{num_threads} " \ "--out-dir #{out_dir} " \ "-1 #{forward_reads} " \ "-2 #{reverse_reads} " \ "-r #{single_reads}" # Add the optional opts if out_prefix cmd += " --out-prefix #{out_prefix}" end # For preset of 'default' or anything else, just use the megahit default. if preset == "meta-sensitive" cmd += " --presets meta-sensitive" elsif preset == "meta-large" cmd += " --presets meta-large" elsif preset == "fast" cmd += " --k-list 21" end # Run the initial assembly proc_status = Process.run_it cmd # We check if the assembly finished successfully. unless proc_status.exitstatus.zero? # The assembly failed D: # Try it again with continue. cmd += " --continue" # Since megahit has a checkpoint continue mode, if we can save the assembly by trying once more with --continue, it will save time. proc_status = Process.run_it cmd end # Now we check if the checkpoint assembly failed as well unless proc_status.exitstatus.zero? # First, we want to dump the megahit opts and log files into the log for the coutinho_assembly program. log_diagnostic_files out_dir, out_prefix # Since it failed, we want to remove the output directory, because the retry wrapper function will always fail if you try and use the same assembly directory name. FileUtils.rm_r out_dir if Dir.exist? out_dir # Now that we've got the logs and removed the outdir, the runner wrapper method can cleanly rerun this function. end outputs = { final_contigs: File.join(out_dir, "#{out_prefix}.contigs.fa") } # Return whichever proc_status was the last one to be set, either original assembly or the continued assembly. CoutinhoAssembly::RunnerExit.new proc_status, proc_status.exitstatus, outputs end