class GeneValidator::TabularParser

This class parses the tabular output of BLAST (outfmt 6 & 7)

Attributes

column_names[R]
rows[R]
tab_results[R]
type[R]

Public Class Methods

new(tab_file = opt[:blast_tabular_file], format = opt[:blast_tabular_options], type = config[:type]) click to toggle source

Initializes the object

# File lib/genevalidator/tabular_parser.rb, line 22
def initialize(tab_file = opt[:blast_tabular_file],
               format = opt[:blast_tabular_options], type = config[:type])
  @column_names = format.gsub(/[-\d]/, '').split(/[ ,]/)
  @type         = type
  @tab_results  = analayse_tabular_file(tab_file)
  @rows         = @tab_results.to_enum
end

Public Instance Methods

analayse_tabular_file(filename) click to toggle source
# File lib/genevalidator/tabular_parser.rb, line 32
def analayse_tabular_file(filename)
  lines = CSV.parse(File.read(filename), col_sep: "\t", skip_lines: /^#/,
                                         headers: @column_names)
  lines.map(&:to_hash)
end
move_to_next_query()
Alias for: next
next() click to toggle source

move to next query

# File lib/genevalidator/tabular_parser.rb, line 40
def next
  current_entry = @rows.peek['qseqid']
  loop do
    entry = @rows.peek['qseqid']
    @rows.next
    break unless entry == current_entry
  end
end
Also aliased as: move_to_next_query
parse_next(query_id = nil) click to toggle source
# File lib/genevalidator/tabular_parser.rb, line 53
def parse_next(query_id = nil)
  current_id = @rows.peek['qseqid']
  return [] if !query_id.nil? && current_id != query_id
  hit_seq = initialise_classes(current_id)
  move_to_next_query
  hit_seq
rescue StopIteration
  []
end

Private Instance Methods

initialise_classes(current_id, tab_results = @tab_results) click to toggle source
# File lib/genevalidator/tabular_parser.rb, line 67
def initialise_classes(current_id, tab_results = @tab_results)
  hits = tab_results.partition { |h| h['qseqid'] == current_id }[0]
  grouped_hits = hits.group_by { |row| row['sseqid'] }

  grouped_hits.map do |_query_id, rows|
    hit_seq = Query.new
    hit_seq.init_tabular_attribute(rows[0])
    hit_seq.hsp_list = rows.map { |row| Hsp.new(tabular_input: row) }
    hit_seq.type = :protein
    hit_seq
  end
end