class MzID::ParserSax
class to parse an mzIdentML file (.mzid) in a memory efficient manner. can parse large files that a DOM parser will fail on, e.g., most mzid parsers. The caveat is that it must be written to a csv file.
Public Class Methods
new(file, use_pbar = nil, tda_flag = true)
click to toggle source
# File lib/mzid/parser_sax.rb, line 203 def initialize(file, use_pbar = nil, tda_flag = true) @use_pbar = use_pbar @mzid_file = file @tda_flag = tda_flag # # get counts if @use_pbar then count_handler = CounterHandler.new File.open(@mzid_file){|f| Ox.sax_parse(count_handler, f)} @num_spec = count_handler.spec_count end #puts "DBSeq:\t#{count_handler.dbseq_count}" #puts "Peptides:\t#{count_handler.pep_count}" #puts "PepEv:\t#{count_handler.pepev_count}" #puts "Spectra:\t#{count_handler.spec_count}" # # cache DBSequence elements dbseq_handler = DBSequenceHandler.new(@use_pbar.nil? ? nil : count_handler.dbseq_count) File.open(@mzid_file){|f| Ox.sax_parse(dbseq_handler, f)} dbseq_handler.pbar.finish if !dbseq_handler.pbar.nil? @dbseq_h = dbseq_handler.dbseq_h # # cache Peptide elements pep_handler = PeptideHandler.new(@use_pbar.nil? ? nil : count_handler.pep_count) File.open(@mzid_file){|f| Ox.sax_parse(pep_handler, f)} pep_handler.pbar.finish if !pep_handler.pbar.nil? @pep_h = pep_handler.pep_h @mod_h = pep_handler.mod_h # # create/cache PeptideEvent elements pep_ev_handler = PeptideEventHandler.new(@dbseq_h, @use_pbar.nil? ? nil : count_handler.pepev_count) File.open(@mzid_file){|f| Ox.sax_parse(pep_ev_handler, f)} pep_ev_handler.pbar.finish if !pep_ev_handler.pbar.nil? @pep_ev_h = pep_ev_handler.pep_ev_h end
Public Instance Methods
write_to_csv(outfile="result.csv", show_mods=true)
click to toggle source
write output to specified csv file
# File lib/mzid/parser_sax.rb, line 243 def write_to_csv(outfile="result.csv", show_mods=true) CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv| headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"] headerAry.push("mods") if show_mods headerAry.delete("decoy") if !@tda_flag csv << headerAry proc = Proc.new do |spec_h| # peptide reference/seq pep_ref = spec_h[:peptide_ref].to_sym pep_seq = @pep_h[pep_ref] mods = @mod_h[pep_ref] # peptide evidence list pep_ev_ref_lst = spec_h[:peptideEvidence_ref] # number of proteins with matching peptide num_prot = pep_ev_ref_lst.size # for each PeptideEvidence entry ... pep_ev_ref_lst.each do |pep_ev_ref| pep_ev = @pep_ev_h[pep_ev_ref] # start/end pos within protein start_pos = pep_ev.get_start_pos end_pos = pep_ev.get_end_pos # get protein ID prot_id = pep_ev.get_prot_id # get decoy flag is_decoy = pep_ev.get_is_decoy # write to file ary = [spec_h[:id], pep_seq, spec_h[:spec_prob], is_decoy, prot_id, start_pos, end_pos, num_prot] ary.delete_at(3) if !@tda_flag if show_mods then modstr = if !mods.nil? then mods.keys.map{|loc| val = mods[loc].to_i; [loc, val > 0 ? "+#{val}" : "-#{val}"].join(";")}.join("|") else nil end ary.push(modstr) end csv << ary end end spec_handler = SpectraIDHandler.new(@dbseq_h, @pep_h, @pep_ev_h, proc, @use_pbar.nil? ? nil : @num_spec) File.open(@mzid_file){|f| Ox.sax_parse(spec_handler, f)} spec_handler.pbar.finish if !spec_handler.pbar.nil? end end