class GeneValidator::Validate

Class that runs the validations (Instatiated for each query)

Public Class Methods

new() click to toggle source

Initilizes the object Params: opt: A hash with the following keys: validations:, blast_tabular_file:, blast_tabular_options:, blast_xml_file:, db:, raw_sequences:, num_threads:, fast:} start_idx: number of the sequence from the file to start with overall_evaluation: boolean variable for printing overall evaluation

# File lib/genevalidator/validation.rb, line 121
def initialize
  @opt         = opt
  @config      = config
  @run_output  = nil
  @overview    = overview
  @query_idx   = query_idx
end

Public Instance Methods

check_hit_coverage(prediction, hit) click to toggle source
# File lib/genevalidator/validation.rb, line 165
def check_hit_coverage(prediction, hit)
  coverage = Array.new(prediction.length_protein, 0)
  hit.hsp_list.each do |hsp|
    match_to   = hsp.match_query_to
    match_from = hsp.match_query_from
    len        = match_to - match_from + 1
    coverage[match_from - 1..match_to - 1] = Array.new(len, 1)
  end
  coverage
end
check_validations(vals) click to toggle source
# File lib/genevalidator/validation.rb, line 194
def check_validations(vals)
  # check the class type of the elements in the list
  vals.each { |v| raise ValidationClassError unless v.is_a? ValidationTest }
  # check alias duplication
  aliases = vals.map(&:cli_name)
  raise AliasDuplicationError unless aliases.length == aliases.uniq.length
rescue ValidationClassError => e
  warn e
  exit 1
rescue AliasDuplicationError => e
  warn e
  exit 1
end
check_validations_output(vals) click to toggle source
# File lib/genevalidator/validation.rb, line 208
def check_validations_output(vals)
  raise NoValidationError if @run_output.validations.empty?
  vals.each do |v|
    raise ReportClassError unless v.validation_report.is_a? ValidationReport
  end
rescue NoValidationError => e
  warn e
  exit 1
rescue ReportClassError => e
  warn e
  exit 1
end
compute_run_score() click to toggle source
# File lib/genevalidator/validation.rb, line 221
def compute_run_score
  validations        = @run_output.validations
  scores             = {}
  scores[:successes] = validations.count { |v| v.result == v.expected }
  scores[:fails] = validations.count do |v|
    v.validation != :unapplicable && v.validation != :error &&
      v.result != v.expected
  end
  scores = length_validation_scores(validations, scores)

  @run_output.successes = scores[:successes]
  @run_output.fails     = scores[:fails]
  num_total_validations = scores[:successes].to_i + scores[:fails]
  if num_total_validations.zero?
    @run_output.overall_score = 0
  else
    @run_output.overall_score = (scores[:successes] * 90 / num_total_validations).round
  end
end
create_validation_tests(prediction, hits) click to toggle source
# File lib/genevalidator/validation.rb, line 176
def create_validation_tests(prediction, hits)
  val = []
  val.push MakerQIValidation.new(prediction, hits)
  val.push LengthClusterValidation.new(prediction, hits)
  val.push LengthRankValidation.new(prediction, hits)
  val.push GeneMergeValidation.new(prediction, hits)
  val.push DuplicationValidation.new(prediction, hits)
  init_nucleotide_only_validations(val, prediction, hits)
  val.push AlignmentValidation.new(prediction, hits)
  val.select { |v| @opt[:validations].include? v.cli_name.downcase }
end
generate_run_output() click to toggle source
# File lib/genevalidator/validation.rb, line 261
def generate_run_output
  @run_output.print_output_console
  @run_output.generate_json
end
init_nucleotide_only_validations(val, prediction, hits) click to toggle source
# File lib/genevalidator/validation.rb, line 188
def init_nucleotide_only_validations(val, prediction, hits)
  return unless @config[:type] == :nucleotide
  val.push BlastReadingFrameValidation.new(prediction, hits)
  val.push OpenReadingFrameValidation.new(prediction, hits)
end
length_validation_scores(validations, scores) click to toggle source

Since there are two length validations, it is necessary to adjust the

scores accordingly
# File lib/genevalidator/validation.rb, line 243
def length_validation_scores(validations, scores)
  lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
  lrv = validations.select { |v| v.class == LengthRankValidationOutput }
  if lcv.length == 1 && lrv.length == 1
    score_lcv = (lcv[0].result == lcv[0].expected)
    score_lrv = (lrv[0].result == lrv[0].expected)
    if score_lcv == true && score_lrv == true
      scores[:successes] -= 1 # if both are true: counted as 1 success
    elsif score_lcv == false && score_lrv == false
      scores[:fails] -= 1 # if both are false: counted as 1 fail
    else
      scores[:successes] -= 0.5
      scores[:fails] -= 0.5
    end
  end
  scores
end
remove_identical_hits(prediction, hits) click to toggle source

Removes identical hits (100% coverage and >99% identity) Params: prediction: Sequence object hits: Array of Sequence objects Output: new array of hit Sequence objects

# File lib/genevalidator/validation.rb, line 155
def remove_identical_hits(prediction, hits)
  hits.delete_if do |hit|
    low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 }
    no_data      = hit.hsp_list.select { |hsp| hsp.pidentity.nil? }
    low_identity += no_data
    coverage      = check_hit_coverage(prediction, hit)
    low_identity.empty? && coverage.uniq.length == 1
  end
end
validate(prediction, hits, current_idx) click to toggle source

Validate one query and create validation report Params: prediction: Sequence object hits: Array of Sequence objects current_idx: the index number of the query

# File lib/genevalidator/validation.rb, line 135
def validate(prediction, hits, current_idx)
  hits = remove_identical_hits(prediction, hits)
  vals = create_validation_tests(prediction, hits)
  check_validations(vals)
  vals.each(&:run)
  @run_output = Output.new(current_idx, hits.length, prediction.definition)
  @run_output.validations = vals.map(&:validation_report)
  check_validations_output(vals)

  compute_run_score
  generate_run_output
end