module Ms::ErrorRate::Qvalue::Mascot::Percolator
Public Instance Methods
qvalues(datp_files, tab_txt_files, opts={})
click to toggle source
returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue) opts =
:min_peptide_length => Integer
# File lib/ms/error_rate/qvalue/mascot/percolator.rb, line 49 def qvalues(datp_files, tab_txt_files, opts={}) min_pep_len = opts[:min_peptide_length] # we only want the top hit per query title (which should ensure that we # get the top hit per scan) hits_by_query_title = Hash.new {|h,k| h[k] = [] } datp_files.zip(tab_txt_files) do |datp_file, tab_txt_file| # build a hash based on the sequence structs = Ms::ErrorRate::Qvalue::Mascot::Percolator.tab_txt( tab_txt_file ) qvalue_by_query_rank = {} structs.each do |struct| qvalue_by_query_rank[[struct.query_num, struct.rank]] = struct.q_value end base_no_ext = File.basename(datp_file, '.*') Ms::Mascot::Dat.open(datp_file) do |dat| dat.each_peptide_hit(:by => :groups, :yield_nil => false, :with_query => true) do |hits,query| hits.each do |hit| if qval = qvalue_by_query_rank[[hit.query_num, hit.hit_num]] hit_as_struct = Ms::ErrorRate::Qvalue::Mascot::MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score, qval) hits_by_query_title[hit_as_struct.query_title] << hit_as_struct end end end end end final_hits = [] hits_by_query_title.each do |title, hits| best_hit = if hits.size == 1 hits.first else hits.sort_by(&:mowse).last end # FILTER HERE: # ONLY TAKE the BEST HIT IF it passes any filters if min_pep_len next unless best_hit.sequence.size >= min_pep_len end final_hits << best_hit end final_hits end
tab_txt(file)
click to toggle source
returns an array of Structs where the keys are the first line everything is cast properly three additional keys are available query_num, rank, sequence sequence is the amino acid sequence without the surrounding X’s and dots. (with ‘-’ substituted for ‘_’)
# File lib/ms/error_rate/qvalue/mascot/percolator.rb, line 19 def tab_txt(file) hits = [] File.open(file) do |io| # PSMId score q-value posterior_error_prob peptide proteinIds atts = io.gets.chomp.split("\t").map {|v| v.gsub('-', '_').to_sym } atts.push(:query_num, :rank, :sequence) struct_class = Struct.new("Hit", *atts) io.each do |line| (query_rank, score, qvalue, perrp, peptide, *prots ) = line.chomp.split("\t") (query, rank) = query_rank.split(';').map {|v| v.split(':').last.to_i } hits << struct_class.new(query_rank, score.to_f, qvalue.to_f, perrp.to_f, peptide, prots, query, rank, peptide.split('.')[1]) end end hits end