class NpSearch::ArgumentsValidators
A class that validates the command line opts
Public Class Methods
run(opt)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 12 def run(opt) assert_file_present('input fasta file', opt[:input_file]) opt[:input_file] = File.expand_path(opt[:input_file]) assert_input_file_not_empty(opt[:input_file]) assert_input_file_probably_fasta(opt[:input_file]) opt[:type] = assert_input_sequence(opt[:input_file]) opt[:num_threads] = check_num_threads(opt[:num_threads]) assert_binaries('SignalP 4.1 Script', opt[:signalp_path]) logger.debug "The validated OPT hash contains: #{opt}" opt end
Private Class Methods
assert_binaries(desc, bin)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 106 def assert_binaries(desc, bin) logger.debug "Checking #{desc} binary at: #{bin}." return if command?(bin.to_s) warn_msg = "NpSearch is unable to use the #{desc} at #{bin}" logger.warn warn_msg $stderr.puts warn_msg end
assert_file_present(desc, file, exit_code = 1)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 26 def assert_file_present(desc, file, exit_code = 1) logger.debug "Testing if the #{desc} exists: '#{file}'." return if file && File.exist?(File.expand_path(file)) error_msg = "*** Error: Couldn't find the #{desc}: '#{file}'." logger.fatal error_msg $stderr.puts error_msg exit exit_code end
assert_input_file_not_empty(file)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 35 def assert_input_file_not_empty(file) logger.debug "Testing if the input file ('#{file}') is empty." return unless File.zero?(File.expand_path(file)) error_msg = "*** Error: The input_file ('#{file}') seems to be empty." logger.fatal error_msg $stderr.puts error_msg exit 1 end
assert_input_file_probably_fasta(file)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 44 def assert_input_file_probably_fasta(file) logger.debug("Testing whether the input, ('#{file}') is a fasta file.") File.open(file, 'r') do |f| fasta = f.readline[0] == '>' ? true : false return fasta if fasta end error_msg = "*** Error: The input file (#{file}) does not seems to be" \ ' to be a fasta file.' logger.fatal error_msg $stderr.puts error_msg exit 1 end
assert_input_sequence(file)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 57 def assert_input_sequence(file) type = type_of_sequences(file) return type unless type.nil? error_msg = '*** Error: The input files seems to contain a mixture of' \ ' both protein and nucleotide data.' \ ' Please correct this and try again.' logger.fatal error_msg $stderr.puts error_msg exit 1 end
check_num_threads(num_threads)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 90 def check_num_threads(num_threads) logger.debug "Checking the number of threads: #{num_threads}" num_threads = Integer(num_threads) unless num_threads > 0 warn_msg = 'Number of threads can not be lower than 0. Changing' \ ' number of threads to 1' logger.warn warn_msg $stderr.puts warn_msg num_threads = 1 end return num_threads unless num_threads > 256 warn_msg = "Number of threads set at #{num_threads} is unusually high." logger.warn warn_msg $stderr.puts warn_msg end
command?(command)
click to toggle source
Return `true` if the given command exists and is executable.
# File lib/npsearch/arg_validator.rb, line 115 def command?(command) system("which #{command} > /dev/null 2>&1") end
guess_sequence_type(seq)
click to toggle source
# File lib/npsearch/arg_validator.rb, line 83 def guess_sequence_type(seq) cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '') return nil if cleaned_sequence.length < 10 # conservative type = Bio::Sequence.new(cleaned_sequence).guess(0.9) type == Bio::Sequence::NA ? :genetic : :protein end
type_of_sequences(file)
click to toggle source
determine file sequence type based on first 500 lines
# File lib/npsearch/arg_validator.rb, line 69 def type_of_sequences(file) logger.debug 'Checking the type of sequence in the input file based' \ ' on the first 500 lines.' fasta_content = File.foreach(file).first(500).join("\n") # the first sequence does not need to have a fasta definition line sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?) # get all sequence types sequence_types = sequences.collect { |seq| guess_sequence_type(seq) } .uniq.compact logger.debug " The guessed typed of Sequences are: #{sequence_types}" return nil if sequence_types.empty? sequence_types.first if sequence_types.length == 1 end