class MzID::ParserSax

class to parse an mzIdentML file (.mzid) in a memory efficient manner. can parse large files that a DOM parser will fail on, e.g., most mzid parsers. The caveat is that it must be written to a csv file.

Public Class Methods

new(file, use_pbar = nil, tda_flag = true) click to toggle source
# File lib/mzid/parser_sax.rb, line 203
def initialize(file, use_pbar = nil, tda_flag = true)
  @use_pbar = use_pbar
  @mzid_file = file
  @tda_flag = tda_flag
  #
  # get counts
  if @use_pbar then
    count_handler = CounterHandler.new
    File.open(@mzid_file){|f| Ox.sax_parse(count_handler, f)}     
    @num_spec = count_handler.spec_count
  end 
  
  #puts "DBSeq:\t#{count_handler.dbseq_count}"
  #puts "Peptides:\t#{count_handler.pep_count}"
  #puts "PepEv:\t#{count_handler.pepev_count}"
  #puts "Spectra:\t#{count_handler.spec_count}"
  #
  # cache DBSequence elements
  dbseq_handler = DBSequenceHandler.new(@use_pbar.nil? ? nil : count_handler.dbseq_count)
  File.open(@mzid_file){|f| Ox.sax_parse(dbseq_handler, f)}
  dbseq_handler.pbar.finish if !dbseq_handler.pbar.nil?
  @dbseq_h = dbseq_handler.dbseq_h
  #
  # cache Peptide elements
  pep_handler = PeptideHandler.new(@use_pbar.nil? ? nil : count_handler.pep_count)
  File.open(@mzid_file){|f| Ox.sax_parse(pep_handler, f)}
  pep_handler.pbar.finish if !pep_handler.pbar.nil?
  @pep_h = pep_handler.pep_h
  @mod_h = pep_handler.mod_h
  #
  # create/cache PeptideEvent elements
  pep_ev_handler = PeptideEventHandler.new(@dbseq_h, @use_pbar.nil? ? nil : count_handler.pepev_count)
  File.open(@mzid_file){|f| Ox.sax_parse(pep_ev_handler, f)}
  pep_ev_handler.pbar.finish if !pep_ev_handler.pbar.nil?
  @pep_ev_h = pep_ev_handler.pep_ev_h
  
end

Public Instance Methods

write_to_csv(outfile="result.csv", show_mods=true) click to toggle source

write output to specified csv file

# File lib/mzid/parser_sax.rb, line 243
def write_to_csv(outfile="result.csv", show_mods=true)
  CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv|
    headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"]
    headerAry.push("mods") if show_mods
    headerAry.delete("decoy") if !@tda_flag
    csv << headerAry
    
    proc = Proc.new do |spec_h|
      # peptide reference/seq
      pep_ref = spec_h[:peptide_ref].to_sym
      pep_seq = @pep_h[pep_ref]
      mods = @mod_h[pep_ref]
      # peptide evidence list
      pep_ev_ref_lst = spec_h[:peptideEvidence_ref]
      # number of proteins with matching peptide
      num_prot = pep_ev_ref_lst.size
      # for each PeptideEvidence entry ...
      pep_ev_ref_lst.each do |pep_ev_ref|
        pep_ev = @pep_ev_h[pep_ev_ref]
        # start/end pos within protein
        start_pos = pep_ev.get_start_pos
        end_pos = pep_ev.get_end_pos
        # get protein ID
        prot_id = pep_ev.get_prot_id
        # get decoy flag
        is_decoy = pep_ev.get_is_decoy
        # write to file
        ary = [spec_h[:id], pep_seq, spec_h[:spec_prob], is_decoy, prot_id, start_pos, end_pos, num_prot]
        ary.delete_at(3) if !@tda_flag
        if show_mods then
          modstr = if !mods.nil? then
                     mods.keys.map{|loc| val = mods[loc].to_i; [loc, val > 0 ? "+#{val}" : "-#{val}"].join(";")}.join("|")
                   else
                     nil
                   end
          ary.push(modstr)
        end
        csv << ary
      end 
      
    end
    spec_handler = SpectraIDHandler.new(@dbseq_h, @pep_h, @pep_ev_h, proc, @use_pbar.nil? ? nil : @num_spec)
    File.open(@mzid_file){|f| Ox.sax_parse(spec_handler, f)}
    spec_handler.pbar.finish if !spec_handler.pbar.nil?
  end
end