class MzIdentMLDoc
Constants
- MZID_NS
- MZID_NS_PREFIX
Attributes
db_sequence_cache[R]
psms_cache[R]
Public Class Methods
find(node,expression,root=false)
click to toggle source
Class Level Utility methods for searching from a given node
# File lib/protk/mzidentml_doc.rb, line 97 def self.find(node,expression,root=false) pp = root ? "//" : "./" node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}") end
new(path)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 35 def initialize(path) parser=XML::Parser.file(path) @document=parser.parse end
Public Instance Methods
analysis_software()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 52 def analysis_software @document.find("//#{MZID_NS_PREFIX}:AnalysisSoftware","#{MZID_NS_PREFIX}:#{MZID_NS}") end
dbsequence_cache()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 24 def dbsequence_cache if !@dbsequence_cache @dbsequence_cache={} Constants.instance.log "Generating DB index" , :debug self.dbsequences.each do |db_sequence| @dbsequence_cache[db_sequence.attributes['accession']]=db_sequence end end @dbsequence_cache end
dbsequences()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 68 def dbsequences @document.find("//#{MZID_NS_PREFIX}:DBSequence","#{MZID_NS_PREFIX}:#{MZID_NS}") end
enzymes()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 48 def enzymes @document.find("//#{MZID_NS_PREFIX}:Enzyme","#{MZID_NS_PREFIX}:#{MZID_NS}") end
find(node,expression,root=false)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 102 def find(node,expression,root=false) MzIdentMLDoc.find(node,expression,root) end
get_best_psm_for_peptide(peptide_node)
click to toggle source
<PeptideHypothesis peptideEvidence_ref=“PepEv_1”>
<SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
</PeptideHypothesis>
# File lib/protk/mzidentml_doc.rb, line 148 def get_best_psm_for_peptide(peptide_node) best_score=nil best_psm=nil spectrumidrefs = self.find(peptide_node,"SpectrumIdentificationItemRef") Constants.instance.log "Searching from among #{spectrumidrefs.length} for best psm" , :debug spectrumidrefs.each do |id_ref_node| id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref'] # psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0] psm_node = self.psms_cache[id_ref] score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f if ( best_score == nil ) || ( score > best_score ) best_psm=psm_node best_score=score end end best_psm end
get_cvParam(mzidnode,accession)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 107 def get_cvParam(mzidnode,accession) self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0] end
get_dbsequence(mzidnode,accession)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 111 def get_dbsequence(mzidnode,accession) self.dbsequence_cache[accession] # self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0] end
get_peptide_evidence_from_psm(psm_node)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 181 def get_peptide_evidence_from_psm(psm_node) pe_nodes = [] self.find(psm_node,"PeptideEvidenceRef").each do |pe_node| ev_id=pe_node.attributes['peptideEvidence_ref'] pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0] end pe_nodes end
get_peptides_for_protein(protein_node)
click to toggle source
def self.get_sister_proteins(protein_node)
self.find(protein_node.parent,"ProteinDetectionHypothesis")
end
# File lib/protk/mzidentml_doc.rb, line 141 def get_peptides_for_protein(protein_node) self.find(protein_node,"PeptideHypothesis") end
get_protein_probability(protein_node)
click to toggle source
As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0
# File lib/protk/mzidentml_doc.rb, line 117 def get_protein_probability(protein_node) #MS:1002403 is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil) if is_group_representative return self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01 else return 0 end end
get_proteins_for_group(group_node)
click to toggle source
Memoized because it gets called for every protein in a group
# File lib/protk/mzidentml_doc.rb, line 129 def get_proteins_for_group(group_node) # puts group_node.attributes['group_number'] @proteins_for_group_cache ||= Hash.new do |h,key| h[key] = self.find(key,"ProteinDetectionHypothesis") end @proteins_for_group_cache[group_node] end
get_sequence_for_peptide(peptide_node)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 167 def get_sequence_for_peptide(peptide_node) evidence_ref = peptide_node.attributes['peptideEvidence_ref'] pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref'] peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0] # require 'byebug';byebug peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content end
get_sequence_for_psm(psm_node)
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 175 def get_sequence_for_psm(psm_node) pep_ref = psm_node.attributes['peptide_ref'] peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0] peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content end
peptide_evidence()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 60 def peptide_evidence @document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}") end
peptides()
click to toggle source
Peptides are referenced in many ways in mzidentml. We define a “Peptide” as a peptide supporting a particular protein Such peptides may encompass several PSM's
# File lib/protk/mzidentml_doc.rb, line 85 def peptides @document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}") end
protein_groups()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 72 def protein_groups @document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}") end
proteins()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 77 def proteins @document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}") end
psms()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 64 def psms @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}") end
search_databases()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 44 def search_databases @document.find("//#{MZID_NS_PREFIX}:SearchDatabase","#{MZID_NS_PREFIX}:#{MZID_NS}") end
source_files()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 40 def source_files @document.find("//#{MZID_NS_PREFIX}:SourceFile","#{MZID_NS_PREFIX}:#{MZID_NS}") end
spectrum_queries()
click to toggle source
# File lib/protk/mzidentml_doc.rb, line 56 def spectrum_queries @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}") end