class MgNu::Parser::Sam

Constants

FIELDS

Attributes

file[R]
header[R]

Public Class Methods

new(filename = nil) click to toggle source

create a new SAM file parser

# File lib/mgnu/parser/sam.rb, line 17
def initialize(filename = nil)
  @header = nil
  if filename
    if File.exists?(filename) and File.readable?(filename)
      # # find all the reference sequences
      # # skip header lines (^@) and then only save the 3rd column
      # # from the sam file input, only return unique names, then
      # # split on newlines
      # `egrep -v '^@' #{filename} | cut -f3 | uniq`.split(/\n/).each do |ref|
      #   if @references.has_key?(ref)
      #     $stderr.puts "Already a reference by name (#{ref})"
      #     $stderr.puts "... skipping"
      #   else
      #     @references[ref] = MgNu::Parser::Sam::Reference.new(:name => ref)
      #   end
      # end
      
      @file = File.open(filename)
    end # end of exists and readable file checks
  else
    error "MgNu::Parser::Sam.new(): need a SAM file"
    exit(1)
  end # end of if/else filename
end

Public Instance Methods

each() { |pair| ... } click to toggle source

override enumerables MgNu::Parser::Sam will emit a reference-object with every iteration. Iteration happens with file-reading.

# File lib/mgnu/parser/sam.rb, line 45
def each
  header_buffer = Array.new
  # short-term buffer hash
  alignment_buffer = Hash.new
  
  @file.each do |line|
    next if line =~ /^\s*$/
    line.chomp!
    if line =~ /^@/
      header_buffer << line
    else
      if header_buffer.length > 0
        @header = process_header(header_buffer)
        header_buffer.clear
      end
      alignment_attrs = Hash[*FIELDS.zip(line.split("\t"))]
      # TODO last field needs to be globbed into array
      alignment = MgNu::Parser::Sam::Alignment.new(alignment_attrs)
      next unless alignment.matched_and_paired?
      key = alignment.first_read? ? :first : :second
      if !alignment_buffer[alignment.basename]
        alignment_buffer[alignment.basename] = { key => alignment }
      else
        read_pair = alignment_buffer[alignment.basename] 
        read_pair[key] = alignment
        yield MgNu::Parser::Sam::Pair.new(name, read_pair[:first], read_pair[:second])
        alignment_buffer.delete(alignment.basename)
      end
    end
  end
end
process_header(buffer) click to toggle source
# File lib/mgnu/parser/sam.rb, line 77
def process_header(buffer)
  hdr = MgNu::Parser::Sam::Header.new
  buffer.each do |line|
    case line
    when /^@HD/
      if line =~ /VN:(.+)[\s\n]/
        hdr.vn = $1
      end
      if line =~ /SO:(.+)[\s\n]/
        hdr.so = $1
      end
    when /^@SQ/
      ref = nil
      if line =~ /SN:(.+)[\s\n]/
        # verify this ref is in the @references hash (from
        # initialize()
        if @references.has_key?($1)
          ref = @references[$1]
        else
          $stderr.puts "WARNING: reference from header not found in alignments"
          # create a ref
          ref = MgNu::Parser::Sam::Reference.new(:name => $1)
          @references[$1] = ref
        end
      end

      if line =~ /LN:(\d+)[\s\n]/
        if ref
          ref.ln = $1.to_i
        end
      end
    end
  end
  return hdr
end