class GeneValidator::Validate
Class that runs the validations (Instatiated for each query)
Public Class Methods
Initilizes the object Params: opt
: A hash with the following keys: validations:, blast_tabular_file:, blast_tabular_options:, blast_xml_file:, db:, raw_sequences:, num_threads:, fast:} start_idx
: number of the sequence from the file to start with overall_evaluation
: boolean variable for printing overall evaluation
# File lib/genevalidator/validation.rb, line 121 def initialize @opt = opt @config = config @run_output = nil @overview = overview @query_idx = query_idx end
Public Instance Methods
# File lib/genevalidator/validation.rb, line 165 def check_hit_coverage(prediction, hit) coverage = Array.new(prediction.length_protein, 0) hit.hsp_list.each do |hsp| match_to = hsp.match_query_to match_from = hsp.match_query_from len = match_to - match_from + 1 coverage[match_from - 1..match_to - 1] = Array.new(len, 1) end coverage end
# File lib/genevalidator/validation.rb, line 194 def check_validations(vals) # check the class type of the elements in the list vals.each { |v| raise ValidationClassError unless v.is_a? ValidationTest } # check alias duplication aliases = vals.map(&:cli_name) raise AliasDuplicationError unless aliases.length == aliases.uniq.length rescue ValidationClassError => e warn e exit 1 rescue AliasDuplicationError => e warn e exit 1 end
# File lib/genevalidator/validation.rb, line 208 def check_validations_output(vals) raise NoValidationError if @run_output.validations.empty? vals.each do |v| raise ReportClassError unless v.validation_report.is_a? ValidationReport end rescue NoValidationError => e warn e exit 1 rescue ReportClassError => e warn e exit 1 end
# File lib/genevalidator/validation.rb, line 221 def compute_run_score validations = @run_output.validations scores = {} scores[:successes] = validations.count { |v| v.result == v.expected } scores[:fails] = validations.count do |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected end scores = length_validation_scores(validations, scores) @run_output.successes = scores[:successes] @run_output.fails = scores[:fails] num_total_validations = scores[:successes].to_i + scores[:fails] if num_total_validations.zero? @run_output.overall_score = 0 else @run_output.overall_score = (scores[:successes] * 90 / num_total_validations).round end end
# File lib/genevalidator/validation.rb, line 176 def create_validation_tests(prediction, hits) val = [] val.push MakerQIValidation.new(prediction, hits) val.push LengthClusterValidation.new(prediction, hits) val.push LengthRankValidation.new(prediction, hits) val.push GeneMergeValidation.new(prediction, hits) val.push DuplicationValidation.new(prediction, hits) init_nucleotide_only_validations(val, prediction, hits) val.push AlignmentValidation.new(prediction, hits) val.select { |v| @opt[:validations].include? v.cli_name.downcase } end
# File lib/genevalidator/validation.rb, line 261 def generate_run_output @run_output.print_output_console @run_output.generate_json end
# File lib/genevalidator/validation.rb, line 188 def init_nucleotide_only_validations(val, prediction, hits) return unless @config[:type] == :nucleotide val.push BlastReadingFrameValidation.new(prediction, hits) val.push OpenReadingFrameValidation.new(prediction, hits) end
Since there are two length validations, it is necessary to adjust the
scores accordingly
# File lib/genevalidator/validation.rb, line 243 def length_validation_scores(validations, scores) lcv = validations.select { |v| v.class == LengthClusterValidationOutput } lrv = validations.select { |v| v.class == LengthRankValidationOutput } if lcv.length == 1 && lrv.length == 1 score_lcv = (lcv[0].result == lcv[0].expected) score_lrv = (lrv[0].result == lrv[0].expected) if score_lcv == true && score_lrv == true scores[:successes] -= 1 # if both are true: counted as 1 success elsif score_lcv == false && score_lrv == false scores[:fails] -= 1 # if both are false: counted as 1 fail else scores[:successes] -= 0.5 scores[:fails] -= 0.5 end end scores end
Removes identical hits (100% coverage and >99% identity) Params: prediction
: Sequence object hits
: Array
of Sequence
objects Output: new array of hit Sequence
objects
# File lib/genevalidator/validation.rb, line 155 def remove_identical_hits(prediction, hits) hits.delete_if do |hit| low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 } no_data = hit.hsp_list.select { |hsp| hsp.pidentity.nil? } low_identity += no_data coverage = check_hit_coverage(prediction, hit) low_identity.empty? && coverage.uniq.length == 1 end end
Validate
one query and create validation report Params: prediction
: Sequence object hits
: Array
of Sequence
objects current_idx
: the index number of the query
# File lib/genevalidator/validation.rb, line 135 def validate(prediction, hits, current_idx) hits = remove_identical_hits(prediction, hits) vals = create_validation_tests(prediction, hits) check_validations(vals) vals.each(&:run) @run_output = Output.new(current_idx, hits.length, prediction.definition) @run_output.validations = vals.map(&:validation_report) check_validations_output(vals) compute_run_score generate_run_output end