class MzIdentMLDoc

Constants

MZID_NS
MZID_NS_PREFIX

Attributes

db_sequence_cache[R]
psms_cache[R]

Public Class Methods

find(node,expression,root=false) click to toggle source

Class Level Utility methods for searching from a given node


# File lib/protk/mzidentml_doc.rb, line 97
def self.find(node,expression,root=false)
        pp = root ? "//" : "./"
        node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
new(path) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 35
def initialize(path)
        parser=XML::Parser.file(path)
        @document=parser.parse
end

Public Instance Methods

analysis_software() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 52
def analysis_software
        @document.find("//#{MZID_NS_PREFIX}:AnalysisSoftware","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
dbsequence_cache() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 24
def dbsequence_cache
        if !@dbsequence_cache
                @dbsequence_cache={}
                Constants.instance.log "Generating DB index" , :debug
                self.dbsequences.each do |db_sequence|  
                        @dbsequence_cache[db_sequence.attributes['accession']]=db_sequence
                end
        end
        @dbsequence_cache
end
dbsequences() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 68
def dbsequences
        @document.find("//#{MZID_NS_PREFIX}:DBSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")               
end
enzymes() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 48
def enzymes
        @document.find("//#{MZID_NS_PREFIX}:Enzyme","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
find(node,expression,root=false) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 102
def find(node,expression,root=false)
        MzIdentMLDoc.find(node,expression,root)
end
get_best_psm_for_peptide(peptide_node) click to toggle source

<PeptideHypothesis peptideEvidence_ref=“PepEv_1”>

<SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>

</PeptideHypothesis>

# File lib/protk/mzidentml_doc.rb, line 148
def get_best_psm_for_peptide(peptide_node)
        best_score=nil
        best_psm=nil
        spectrumidrefs = self.find(peptide_node,"SpectrumIdentificationItemRef")
        Constants.instance.log "Searching from among #{spectrumidrefs.length} for best psm" , :debug

        spectrumidrefs.each do |id_ref_node|  
                id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref']
                # psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0]
                psm_node = self.psms_cache[id_ref]
                score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f
                if ( best_score == nil ) || ( score > best_score )
                        best_psm=psm_node
                        best_score=score
                end
        end
        best_psm
end
get_cvParam(mzidnode,accession) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 107
def get_cvParam(mzidnode,accession)
        self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0]
end
get_dbsequence(mzidnode,accession) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 111
def get_dbsequence(mzidnode,accession)
        self.dbsequence_cache[accession]
        # self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0]
end
get_peptide_evidence_from_psm(psm_node) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 181
def get_peptide_evidence_from_psm(psm_node)
        pe_nodes = []
        self.find(psm_node,"PeptideEvidenceRef").each do |pe_node|
                ev_id=pe_node.attributes['peptideEvidence_ref']   
                pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0]
        end
        pe_nodes
end
get_peptides_for_protein(protein_node) click to toggle source

def self.get_sister_proteins(protein_node)

self.find(protein_node.parent,"ProteinDetectionHypothesis")

end

# File lib/protk/mzidentml_doc.rb, line 141
def get_peptides_for_protein(protein_node)
        self.find(protein_node,"PeptideHypothesis")
end
get_protein_probability(protein_node) click to toggle source

As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0

# File lib/protk/mzidentml_doc.rb, line 117
def get_protein_probability(protein_node)

        #MS:1002403
        is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil)
        if is_group_representative
                return       self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01
        else
                return 0
        end
end
get_proteins_for_group(group_node) click to toggle source

Memoized because it gets called for every protein in a group

# File lib/protk/mzidentml_doc.rb, line 129
def get_proteins_for_group(group_node)
        # puts group_node.attributes['group_number']
        @proteins_for_group_cache ||= Hash.new do |h,key|
                h[key] = self.find(key,"ProteinDetectionHypothesis")
        end
        @proteins_for_group_cache[group_node]
end
get_sequence_for_peptide(peptide_node) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 167
def get_sequence_for_peptide(peptide_node)
        evidence_ref = peptide_node.attributes['peptideEvidence_ref']
        pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref']
        peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
        # require 'byebug';byebug
        peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
end
get_sequence_for_psm(psm_node) click to toggle source
# File lib/protk/mzidentml_doc.rb, line 175
def get_sequence_for_psm(psm_node)
        pep_ref = psm_node.attributes['peptide_ref']
        peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
        peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
end
peptide_evidence() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 60
def peptide_evidence
        @document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
peptides() click to toggle source

Peptides are referenced in many ways in mzidentml. We define a “Peptide” as a peptide supporting a particular protein Such peptides may encompass several PSM's

# File lib/protk/mzidentml_doc.rb, line 85
def peptides
        @document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
protein_groups() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 72
def protein_groups
        @document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
proteins() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 77
def proteins
        @document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
psms() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 64
def psms
        @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
search_databases() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 44
def search_databases
        @document.find("//#{MZID_NS_PREFIX}:SearchDatabase","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
source_files() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 40
def source_files
        @document.find("//#{MZID_NS_PREFIX}:SourceFile","#{MZID_NS_PREFIX}:#{MZID_NS}")
end
spectrum_queries() click to toggle source
# File lib/protk/mzidentml_doc.rb, line 56
def spectrum_queries
        @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}")
end