Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzTabFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_MZTABFILE_H
36 #define OPENMS_FORMAT_MZTABFILE_H
37 
38 #include <OpenMS/FORMAT/MzTab.h>
39 
40 #include <boost/math/special_functions/fpclassify.hpp>
41 
42 #include <vector>
43 #include <algorithm>
44 
45 namespace OpenMS
46 {
47  class String;
48  class SVOutStream;
54  class OPENMS_DLLAPI MzTabFile
55  {
56 public:
58  MzTabFile();
60  ~MzTabFile();
61 
62  typedef std::map<std::pair<String, String>, std::vector<PeptideHit> > MapAccPepType;
63 
64  // store MzTab file
65  void store(const String& filename, const MzTab& mz_tab) const;
66 
67  // Set store behaviour of optional "reliability" and "uri" columns (default=no)
68  void storeProteinReliabilityColumn(bool store);
69  void storePeptideReliabilityColumn(bool store);
70  void storePSMReliabilityColumn(bool store);
71  void storeSmallMoleculeReliabilityColumn(bool store);
72  void storeProteinUriColumn(bool store);
73  void storePeptideUriColumn(bool store);
74  void storePSMUriColumn(bool store);
75  void storeSmallMoleculeUriColumn(bool store);
76  void storeProteinGoTerms(bool store);
77 
78  // load MzTab file
79  void load(const String& filename, MzTab& mz_tab);
80 
81 protected:
91 
92  void generateMzTabMetaDataSection_(const MzTabMetaData& map, StringList& sl) const;
93 
94  void generateMzTabProteinSection_(const MzTabProteinSectionRows& rows, StringList& sl) const;
95 
96  String generateMzTabProteinHeader_(const MzTabProteinSectionRow& reference_row, const Size n_best_search_engine_scores, const std::vector<String>& optional_columns) const;
97 
98  String generateMzTabProteinSectionRow_(const MzTabProteinSectionRow& row) const;
99 
100  void generateMzTabPeptideSection_(const MzTabPeptideSectionRows& rows, StringList& sl) const;
101 
102  String generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns) const;
103 
104  String generateMzTabPeptideSectionRow_(const MzTabPeptideSectionRow& row) const;
105 
106  void generateMzTabPSMSection_(const MzTabPSMSectionRows& rows, StringList& sl) const;
107 
108  String generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector<String>& optional_columns) const;
109 
110  String generateMzTabPSMSectionRow_(const MzTabPSMSectionRow& row) const;
111 
112  void generateMzTabSmallMoleculeSection_(const MzTabSmallMoleculeSectionRows& map, StringList& sl) const;
113 
114  String generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns) const;
115 
116  String generateMzTabSmallMoleculeSectionRow_(const MzTabSmallMoleculeSectionRow& row) const;
117 
118  // auxiliary functions
119  // extract two integers from string (e.g. search_engine_score[1]_ms_run[2] -> 1,2)
120  static std::pair<int, int> extractIndexPairsFromBrackets_(const String& s);
121 
122  static void sortPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
123 
124  static void keepFirstPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
125 
127  static void partitionIntoRuns_(const std::vector<PeptideIdentification>& pep_ids,
128  const std::vector<ProteinIdentification>& pro_ids,
129  std::map<String, std::vector<PeptideIdentification> >& map_run_to_pepids,
130  std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids
131  );
132 
133 
135  static void createProteinToPeptideLinks_(const std::map<String, std::vector<PeptideIdentification> >& map_run_to_pepids, MapAccPepType& map_run_accession_to_pephits);
136 
138  static String extractProteinAccession_(const PeptideHit& peptide_hit);
139 
141  static String extractPeptideModifications_(const PeptideHit& peptide_hit);
142 
144  static String mapSearchEngineToCvParam_(const String& openms_search_engine_name);
145 
146  static String mapSearchEngineScoreToCvParam_(const String& openms_search_engine_name, double score, String score_type);
147 
148  static String extractNumPeptides_(const String& common_identifier, const String& protein_accession,
149  const MapAccPepType& map_run_accesion_to_peptides);
150 
151  // mzTab definition of distinct
152  static String extractNumPeptidesDistinct_(String common_identifier, String protein_accession,
153  const MapAccPepType& map_run_accesion_to_peptides);
154 
155  // same as distinct but additional constraint of uniqueness (=maps to exactly one Protein)
156  static String extractNumPeptidesUnambiguous_(String common_identifier, String protein_accession,
157  const MapAccPepType& map_run_accesion_to_peptides);
158 
159  static std::map<String, Size> extractNumberOfSubSamples_(const std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids);
160 
161  static void writePeptideHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
162 
163  static void writeProteinHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
164 
165  static void writeProteinData_(SVOutStream& output,
166  const ProteinIdentification& prot_id,
167  Size run_count,
168  String input_filename,
169  bool has_coverage,
170  const MapAccPepType& map_run_accesion_to_peptides,
171  const std::map<String, Size>& map_run_to_num_sub
172  );
173 
174  };
175 
176 } // namespace OpenMS
177 
178 #endif // OPENMS_FORMAT_MZTABFILE_H
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
bool store_protein_reliability_
Definition: MzTabFile.h:82
A more convenient string class.
Definition: String.h:57
Definition: MzTab.h:579
bool store_smallmolecule_uri_
Definition: MzTabFile.h:89
std::vector< MzTabSmallMoleculeSectionRow > MzTabSmallMoleculeSectionRows
Definition: MzTab.h:692
bool store_protein_goterms_
Definition: MzTabFile.h:90
bool store_protein_uri_
Definition: MzTabFile.h:86
std::vector< MzTabProteinSectionRow > MzTabProteinSectionRows
Definition: MzTab.h:689
bool store_peptide_reliability_
Definition: MzTabFile.h:83
bool store_peptide_uri_
Definition: MzTabFile.h:87
Definition: MzTab.h:517
bool store_psm_reliability_
Definition: MzTabFile.h:84
Definition: MzTab.h:634
Representation of a peptide hit.
Definition: PeptideHit.h:55
File adapter for MzTab files.
Definition: MzTabFile.h:54
std::vector< MzTabPeptideSectionRow > MzTabPeptideSectionRows
Definition: MzTab.h:690
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:74
bool store_psm_uri_
Definition: MzTabFile.h:88
Definition: MzTab.h:608
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:54
std::vector< MzTabPSMSectionRow > MzTabPSMSectionRows
Definition: MzTab.h:691
bool store_smallmolecule_reliability_
Definition: MzTabFile.h:85
std::map< std::pair< String, String >, std::vector< PeptideHit > > MapAccPepType
Definition: MzTabFile.h:62
Data model of MzTab files. Please see the official MzTab specification at https://code.google.com/p/mztab/.
Definition: MzTab.h:700

OpenMS / TOPP release 2.0.0 Documentation generated on Wed Mar 30 2016 16:18:40 using doxygen 1.8.5