class MgNu::Parser::Sam
Constants
- FIELDS
Attributes
file[R]
header[R]
Public Class Methods
new(filename = nil)
click to toggle source
create a new SAM file parser
# File lib/mgnu/parser/sam.rb, line 17 def initialize(filename = nil) @header = nil if filename if File.exists?(filename) and File.readable?(filename) # # find all the reference sequences # # skip header lines (^@) and then only save the 3rd column # # from the sam file input, only return unique names, then # # split on newlines # `egrep -v '^@' #{filename} | cut -f3 | uniq`.split(/\n/).each do |ref| # if @references.has_key?(ref) # $stderr.puts "Already a reference by name (#{ref})" # $stderr.puts "... skipping" # else # @references[ref] = MgNu::Parser::Sam::Reference.new(:name => ref) # end # end @file = File.open(filename) end # end of exists and readable file checks else error "MgNu::Parser::Sam.new(): need a SAM file" exit(1) end # end of if/else filename end
Public Instance Methods
each() { |pair| ... }
click to toggle source
override enumerables MgNu::Parser::Sam
will emit a reference-object with every iteration. Iteration happens with file-reading.
# File lib/mgnu/parser/sam.rb, line 45 def each header_buffer = Array.new # short-term buffer hash alignment_buffer = Hash.new @file.each do |line| next if line =~ /^\s*$/ line.chomp! if line =~ /^@/ header_buffer << line else if header_buffer.length > 0 @header = process_header(header_buffer) header_buffer.clear end alignment_attrs = Hash[*FIELDS.zip(line.split("\t"))] # TODO last field needs to be globbed into array alignment = MgNu::Parser::Sam::Alignment.new(alignment_attrs) next unless alignment.matched_and_paired? key = alignment.first_read? ? :first : :second if !alignment_buffer[alignment.basename] alignment_buffer[alignment.basename] = { key => alignment } else read_pair = alignment_buffer[alignment.basename] read_pair[key] = alignment yield MgNu::Parser::Sam::Pair.new(name, read_pair[:first], read_pair[:second]) alignment_buffer.delete(alignment.basename) end end end end
process_header(buffer)
click to toggle source
# File lib/mgnu/parser/sam.rb, line 77 def process_header(buffer) hdr = MgNu::Parser::Sam::Header.new buffer.each do |line| case line when /^@HD/ if line =~ /VN:(.+)[\s\n]/ hdr.vn = $1 end if line =~ /SO:(.+)[\s\n]/ hdr.so = $1 end when /^@SQ/ ref = nil if line =~ /SN:(.+)[\s\n]/ # verify this ref is in the @references hash (from # initialize() if @references.has_key?($1) ref = @references[$1] else $stderr.puts "WARNING: reference from header not found in alignments" # create a ref ref = MgNu::Parser::Sam::Reference.new(:name => $1) @references[$1] = ref end end if line =~ /LN:(\d+)[\s\n]/ if ref ref.ln = $1.to_i end end end end return hdr end