module GeneValidator

Top level module / namespace.

A module to validate the command line Arguments

CREDIT: some of these methods have been adapted from SequenceServer

Top level module / namespace.

Top level module / namespace.

Top level module / namespace.

Constants

Pair
Pair1
Plot
VERSION

Attributes

config[RW]
dirs[RW]
mutex[RW]
mutex_array[RW]
opt[RW]
overview[RW]
query_idx[R]

array of indexes for the start offsets of each query in the fasta file

raw_seq_file_index[R]
raw_seq_file_load[R]

Public Class Methods

extract_input_fasta_sequence(index) click to toggle source
# File lib/genevalidator.rb, line 93
def extract_input_fasta_sequence(index)
  start_offset = @query_idx[index + 1] - @query_idx[index]
  end_offset = @query_idx[index]
  IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
end
init(opt, start_idx = 1) click to toggle source
# File lib/genevalidator.rb, line 24
def init(opt, start_idx = 1)
  warn '==> Analysing input arguments'
  @opt = opt
  GVArgValidation.validate_args # validates @opt
  number_of_sequences = index_the_input

  @config = setup_config(start_idx, number_of_sequences)
  @dirs = setup_dirnames(@opt[:input_fasta_file])

  @mutex       = Mutex.new
  @mutex_array = Mutex.new

  resume_from_previous_run(opt[:resumable]) unless opt[:resumable].nil?

  RawSequences.index_raw_seq_file if @opt[:raw_sequences]
end
parse_blast_output_file() click to toggle source

Params: output: filename or stream, according to the type type: file or stream Returns an iterator..

# File lib/genevalidator.rb, line 68
def parse_blast_output_file
  if @opt[:blast_xml_file]
    Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
  else
    TabularParser.new
  end
  ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
end
produce_output() click to toggle source
# File lib/genevalidator.rb, line 99
def produce_output
  @overview = Output.generate_overview(@config[:json_output],
                                       @opt[:min_blast_hits])
  eval_text = Output.generate_evaluation_text(@overview)
  Output.print_console_footer(eval_text, @opt)

  output_files = OutputFiles.new
  output_files.write_json
  output_files.write_html(eval_text)
  output_files.write_csv
  output_files.write_summary
  output_files.print_best_fasta
end
run() click to toggle source

Parse the blast output and run validations

# File lib/genevalidator.rb, line 43
def run
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
  # if no BLAST OUTPUT file provided...
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
    blast_xml_fname = "#{dirs[:filename]}.blast_xml"
    opt[:blast_xml_file] = File.join(dirs[:tmp_dir], blast_xml_fname)
    BlastUtils.run_blast_on_input_file
  end
  # Obtain fasta file of all BLAST hits if running align or dup validations
  if @opt[:validations].include?('align') ||
     @opt[:validations].include?('dup')
    RawSequences.run unless @opt[:raw_sequences]
  end
  # Run Validations
  iterator = parse_blast_output_file
  Validations.new.run_validations(iterator)
  produce_output
  print_directories_locations
end
setup_dirnames(input_file) click to toggle source

Also called by json_to_gv script

# File lib/genevalidator.rb, line 78
def setup_dirnames(input_file)
  fname = File.basename(input_file, File.extname(input_file))
  out_dir = setup_output_dir(fname)
  { filename: fname,
    output_dir: out_dir,
    tmp_dir: File.join(out_dir, 'tmp'),
    json_dir:  File.join(out_dir, 'tmp/json'),
    html_file: File.join(out_dir, "#{fname}_results*.html"),
    json_file: File.join(out_dir, "#{fname}_results.json"),
    csv_file: File.join(out_dir, "#{fname}_results.csv"),
    summary_file: File.join(out_dir, "#{fname}_summary.csv"),
    fasta_file: File.join(out_dir, "#{fname}_results.fa"),
    aux_dir: File.expand_path('../aux', __dir__) }
end

Private Class Methods

assert_output_dir_does_not_exist(output_dir) click to toggle source
# File lib/genevalidator.rb, line 141
def assert_output_dir_does_not_exist(output_dir)
  return unless Dir.exist?(output_dir)
  FileUtils.rm_r(output_dir) if @opt[:force_rewrite]
  return if @opt[:force_rewrite]
  warn "The output directory (#{output_dir}) already exists."
  warn ''
  warn 'Please remove this directory before continuing.'
  warn 'Alternatively, you rerun GeneValidator with the `--force` argument,'
  warn 'which rewrites over any previous output.'
  exit 1
end
copy_blast_xml_files(prev_tmp_dir) click to toggle source
# File lib/genevalidator.rb, line 190
def copy_blast_xml_files(prev_tmp_dir)
  return if @opt[:blast_xml_file] || @opt[:blast_tabular_file]
  prev_blast_xml = Dir[File.join(prev_tmp_dir, '*blast_xml')]
  return if prev_blast_xml.empty?
  blast_xml_fname = "#{@dirs[:filename]}.blast_xml"
  @opt[:blast_xml_file] = File.join(@dirs[:tmp_dir], blast_xml_fname)
  FileUtils.cp(prev_blast_xml[0], @opt[:blast_xml_file])
end
copy_prev_json_output(prev_tmp_dir) click to toggle source
# File lib/genevalidator.rb, line 210
def copy_prev_json_output(prev_tmp_dir)
  prev_json_dir = File.join(prev_tmp_dir, 'json')
  return unless Dir.exist? prev_json_dir
  all_jsons = Dir[File.join(prev_json_dir, '*.json')]
  FileUtils.cp(all_jsons, @dirs[:json_dir])
  overview_json = Dir[File.join(prev_json_dir, 'overview.json')]
  data_jsons = all_jsons - overview_json
  parse_prev_json(data_jsons)
end
copy_raw_seq_files(prev_tmp_dir) click to toggle source
# File lib/genevalidator.rb, line 199
def copy_raw_seq_files(prev_tmp_dir)
  return if @opt[:raw_sequences]
  return unless @opt[:validations].include?('align') ||
                @opt[:validations].include?('dup')
  prev_raw_seq = Dir[File.join(prev_tmp_dir, '*raw_seq')]
  return if prev_raw_seq.empty?
  raw_seq_fname = "#{@dirs[:filename]}.blast_xml.raw_seq"
  @opt[:raw_sequences] = File.join(@dirs[:tmp_dir], raw_seq_fname)
  FileUtils.cp(prev_raw_seq[0], @opt[:raw_sequences])
end
cp_html_files(output_dir) click to toggle source
# File lib/genevalidator.rb, line 153
def cp_html_files(output_dir)
  if @opt[:output_formats].include? 'html'
    aux_files = File.expand_path('../aux/html_files/', __dir__)
    FileUtils.cp_r(aux_files, output_dir)
    FileUtils.ln_s(File.join('..', 'html_files', 'json'),
                   File.join(output_dir, 'tmp', 'json'))
  else
    Dir.mkdir(File.join(output_dir, 'tmp', 'json'))
  end
end
index_the_input() click to toggle source

create a list of index of the queries in the FASTA These offset can then be used to quickly read the input file using the start and end positions of each query.

# File lib/genevalidator.rb, line 168
def index_the_input
  fasta_content = IO.binread(@opt[:input_fasta_file])
  @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map do
    Regexp.last_match.begin(0)
  end
  @query_idx.push(fasta_content.length)
  @query_idx.length - 1
end
parse_prev_json(data_jsons) click to toggle source
# File lib/genevalidator.rb, line 220
def parse_prev_json(data_jsons)
  data_jsons.each do |json|
    json_contents = File.read(File.expand_path(json))
    data = JSON.parse(json_contents, symbolize_names: true)
    idx = json.match(/(\d+).json/)[1].to_i - 1
    @config[:json_output][idx] = data
    print_prev_json_to_console(data)
  end
end
print_directories_locations() click to toggle source
print_prev_json_to_console(data) click to toggle source
resume_from_previous_run(prev_dir) click to toggle source
# File lib/genevalidator.rb, line 182
def resume_from_previous_run(prev_dir)
  prev_tmp_dir = File.join(prev_dir, 'tmp')
  return unless Dir.exist? prev_tmp_dir
  copy_blast_xml_files(prev_tmp_dir)
  copy_raw_seq_files(prev_tmp_dir)
  copy_prev_json_output(prev_tmp_dir)
end
setup_config(start_idx, seq_length) click to toggle source
# File lib/genevalidator.rb, line 115
def setup_config(start_idx, seq_length)
  {
    idx: 0,
    start_idx: start_idx,

    type: BlastUtils.guess_sequence_type_from_input_file,

    json_output: Array.new(seq_length),
    run_no: 0,
    output_max: 2500 # max no. of queries in the output html file
  }
end
setup_output_dir(fname) click to toggle source

Creates the output folder and copies the auxiliar folders to this folder

# File lib/genevalidator.rb, line 130
def setup_output_dir(fname)
  dir_name = "#{fname}_" + Time.now.strftime('%Y_%m_%d_%H_%M_%S')
  default_outdir = File.join(Dir.pwd, dir_name)
  output_dir = @opt[:output_dir].nil? ? default_outdir : @opt[:output_dir]
  assert_output_dir_does_not_exist(output_dir)
  Dir.mkdir(output_dir)
  Dir.mkdir(File.join(output_dir, 'tmp'))
  cp_html_files(output_dir)
  output_dir
end