module NpSearch

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Constants

VERSION

Attributes

logger[RW]
opt[RW]
sequences[RW]
sorted_sequences[R]

Public Class Methods

init(opt) click to toggle source
# File lib/npsearch.rb, line 22
def init(opt)
  @opt = opt
  ArgumentsValidators.run(opt)
  @sequences        = []
  @sorted_sequences = nil
  @pool             = initialise_thread_pool
  create_temp_directory
  extract_orf
end
run() click to toggle source
# File lib/npsearch.rb, line 32
def run
  input_file = @opt[:type] == :genetic ? @opt[:orf] : @opt[:input_file]
  iterate_input_file(input_file)
  @sorted_sequences = @sequences.sort_by(&:score).reverse
  Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
  Output.to_html(@opt[:input_file])
  remove_temp_dir
end

Private Class Methods

create_temp_directory() click to toggle source
# File lib/npsearch.rb, line 53
def create_temp_directory
  FileUtils.mkdir_p(@opt[:temp_dir])
  logger.debug "Successfully creating temp directory at: #{@opt[:temp_dir]}"
end
extract_orf(input = @opt[:input_file], minsize = 90) click to toggle source

Uses getorf from EMBOSS package to extract all ORF

# File lib/npsearch.rb, line 59
def extract_orf(input = @opt[:input_file], minsize = 90)
  return if @opt[:type] == :protein
  logger.debug 'Attempting to extract ORF.'
  @opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
  cmd = "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
        " -minsize #{minsize} >/dev/null 2>&1"
  logger.debug "Running: #{cmd}"
  system(cmd)
  logger.debug("EGexit Code: #{$CHILD_STATUS.exitstatus}")
end
initialise_seqs(entry) click to toggle source
# File lib/npsearch.rb, line 82
def initialise_seqs(entry)
  logger.debug "-- Analysing: '#{entry.definition}' (#{entry.aaseq.length})"
  return if entry.aaseq.length > @opt[:max_orf_length]
  sp = Signalp.analyse_sequence(entry.aaseq.to_s)
  return if sp[:sp] == 'N'
  logger.debug "-- SignalP  : #{sp}"
  seq = Sequence.new(entry, sp)
  if seq.seq =~ /[^A-Za-z]/ # Contains illegal characters
    logger.debug "-- Skipping: '#{entry.definition}' - Contains illegal characters."
    return
  end
  ScoreSequence.run(seq, @opt)
  @sequences << seq
end
initialise_thread_pool() click to toggle source
# File lib/npsearch.rb, line 47
def initialise_thread_pool
  return if @opt[:num_threads] == 1
  logger.debug "Creating a thread pool of size #{@opt[:num_threads]}"
  Pool.new(@opt[:num_threads])
end
iterate_input_file(input_file) click to toggle source
# File lib/npsearch.rb, line 70
def iterate_input_file(input_file)
  logger.debug "Iterating the Input File: #{input_file}"
  Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
    if @opt[:num_threads] > 1
      @pool.schedule(entry) { |e| initialise_seqs(e) }
    else
      initialise_seqs(entry)
    end
  end
  @pool.shutdown if @opt[:num_threads] > 1
end
remove_temp_dir() click to toggle source
# File lib/npsearch.rb, line 97
def remove_temp_dir
  return unless File.directory?(@opt[:temp_dir])
  logger.debug "Deleting Temporary directory: #{@opt[:temp_dir]}"
  FileUtils.rm_rf(@opt[:temp_dir])
end