Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MzXMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Andreas Bertsch $
32 // $Authors: Marc Sturm $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_HANDLERS_MZXMLHANDLER_H
36 #define OPENMS_FORMAT_HANDLERS_MZXMLHANDLER_H
37 
39 #include <OpenMS/FORMAT/Base64.h>
45 
46 #include <stack>
47 
48 namespace OpenMS
49 {
50  class MetaInfoInterface;
51 
52  namespace Internal
53  {
54 
61  template <typename MapType>
62  class MzXMLHandler :
63  public XMLHandler
64  {
65 public:
68  MzXMLHandler(MapType& exp, const String& filename, const String& version, ProgressLogger& logger) :
70  XMLHandler(filename, version),
71  exp_(&exp),
72  cexp_(0),
73  decoder_(),
74  nesting_level_(0),
75  skip_spectrum_(false),
77  consumer_(NULL),
78  scan_count_(0),
79  logger_(logger)
80  {
81  init_();
82  }
83 
85  MzXMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger) :
86  XMLHandler(filename, version),
87  exp_(0),
88  cexp_(&exp),
89  decoder_(),
90  nesting_level_(0),
91  skip_spectrum_(false),
93  consumer_(NULL),
94  scan_count_(0),
95  logger_(logger)
96  {
97  init_();
98  }
99 
101  virtual ~MzXMLHandler() {}
103 
104  // Docu in base class
105  virtual void endElement(const XMLCh* const uri, const XMLCh* const local_name, const XMLCh* const qname);
106 
107  // Docu in base class
108  virtual void startElement(const XMLCh* const uri, const XMLCh* const local_name, const XMLCh* const qname, const xercesc::Attributes& attributes);
109 
110  // Docu in base class
111  virtual void characters(const XMLCh* const chars, const XMLSize_t length);
112 
114  void writeTo(std::ostream& os);
115 
117  void setOptions(const PeakFileOptions& options)
118  {
119  options_ = options;
120  }
121 
124  {
125  return scan_count_;
126  }
127 
130  {
131  consumer_ = consumer;
132  }
133 
134 private:
136  void init_()
137  {
138  cv_terms_.resize(6);
139  //Polarity
140  String("any;+;-").split(';', cv_terms_[0]);
141  //Scan type
142  // is no longer used cv_terms_[1] is empty now
143  //Ionization method
144  String(";ESI;EI;CI;FAB;;;;;;;;;;;;;APCI;;;;;;;;MALDI").split(';', cv_terms_[2]);
146  //Mass analyzer
147  String(";Quadrupole;Quadrupole Ion Trap;;;TOF;Magnetic Sector;FT-ICR;").split(';', cv_terms_[3]);
149  //Detector
150  String(";EMT;;;Faraday Cup;;;;;Channeltron;Daly;Microchannel plate").split(';', cv_terms_[4]);
152  //Resolution method
153  String(";FWHM;TenPercentValley;Baseline").split(';', cv_terms_[5]);
155  /* // OLD:
156  cv_terms_.resize(6);
157  //Polarity
158  String("any;+;-").split(';',cv_terms_[0]);
159  //Scan type
160  // is no longer used cv_terms_[1] is empty now
161  //Ionization method
162  String(";ESI;EI;CI;FAB;TSP;MALDI;FD;FI;PD;SI;TI;API;ISI;CID;CAD;HN;APCI;APPI;ICP").split(';',cv_terms_[2]);
163  //Mass analyzer
164  String(";Quadrupole;Quadrupole Ion Trap;;;TOF;Magnetic Sector;FT-ICR;").split(';',cv_terms_[3]);
165  //Detector
166  String(";EMT;Daly;;Faraday Cup;;;;Channeltron").split(';',cv_terms_[4]);
167  //Resolution method
168  String(";FWHM;TenPercentValley;Baseline").split(';',cv_terms_[5]);
169  */
170  }
171 
172 protected:
173 
175  typedef typename MapType::PeakType PeakType;
178 
182  const MapType* cexp_;
183 
186 
191 
200  {
206  bool skip_data;
207  };
208 
210  std::vector< SpectrumData > spectrum_data_;
212 
215 
218 
221 
224 
227 
229  inline void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, int indent = 4, String tag = "nameValue")
230  {
231  std::vector<String> keys; // Vector to hold keys to meta info
232  meta.getKeys(keys);
233 
234  for (std::vector<String>::const_iterator it = keys.begin(); it != keys.end(); ++it)
235  {
236  if ((*it)[0] != '#') // internally used meta info start with '#'
237  {
238  os << String(indent, '\t') << "<" << tag << " name=\"" << *it << "\" value=\"" << writeXMLEscape(meta.getMetaValue(*it)) << "\"/>\n";
239  }
240  }
241  }
242 
244  std::vector<DataProcessing> data_processing_;
245 
254  {
255  typedef typename SpectrumType::PeakType PeakType;
256 
257  //std::cout << "reading scan" << "\n";
258  if (spectrum_data.char_rest_ == "") // no peaks
259  {
260  return;
261  }
262 
263  //remove whitespaces from binary data
264  //this should not be necessary, but linebreaks inside the base64 data are unfortunately no exception
265  spectrum_data.char_rest_.removeWhitespaces();
266 
267  if (spectrum_data.precision_ == "64")
268  {
269  std::vector<double> data;
270  if (spectrum_data.compressionType_ == "zlib")
271  {
272  decoder_.decode(spectrum_data.char_rest_, Base64::BYTEORDER_BIGENDIAN, data, true);
273  }
274  else
275  {
277  }
278  spectrum_data.char_rest_ = "";
279  PeakType peak;
280  //push_back the peaks into the container
281  for (Size n = 0; n < (2 * spectrum_data.peak_count_); n += 2)
282  {
283  // check if peak in in the specified m/z and intensity range
286  {
287  peak.setMZ(data[n]);
288  peak.setIntensity(data[n + 1]);
289  spectrum_data.spectrum.push_back(peak);
290  }
291  }
292  }
293  else //precision 32
294  {
295  std::vector<float> data;
296  if (spectrum_data.compressionType_ == "zlib")
297  {
298  decoder_.decode(spectrum_data.char_rest_, Base64::BYTEORDER_BIGENDIAN, data, true);
299  }
300  else
301  {
303  }
304  spectrum_data.char_rest_ = "";
305  PeakType peak;
306  //push_back the peaks into the container
307  for (Size n = 0; n < (2 * spectrum_data.peak_count_); n += 2)
308  {
311  {
312  peak.setMZ(data[n]);
313  peak.setIntensity(data[n + 1]);
314  spectrum_data.spectrum.push_back(peak);
315  }
316  }
317  }
318  }
319 
327  {
328 
329  // Whether spectrum should be populated with data
330  if (options_.getFillData())
331  {
332  size_t errCount = 0;
333 #ifdef _OPENMP
334 #pragma omp parallel for
335 #endif
336  for (SignedSize i = 0; i < (SignedSize)spectrum_data_.size(); i++)
337  {
338  // parallel exception catching and re-throwing business
339  if (!errCount) // no need to parse further if already an error was encountered
340  {
341  try
342  {
344  if (options_.getSortSpectraByMZ() && !spectrum_data_[i].spectrum.isSorted())
345  {
346  spectrum_data_[i].spectrum.sortByPosition();
347  }
348  }
349  catch (...)
350  {
351  #pragma omp critical(HandleException)
352  ++errCount;
353  }
354  }
355  }
356  if (errCount != 0)
357  {
358  throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, file_, "Error during parsing of binary data.");
359  }
360  }
361 
362  // Append all spectra
363  for (Size i = 0; i < spectrum_data_.size(); i++)
364  {
365  if (consumer_ != NULL)
366  {
367  consumer_->consumeSpectrum(spectrum_data_[i].spectrum);
369  {
370  exp_->addSpectrum(spectrum_data_[i].spectrum);
371  }
372  }
373  else
374  {
375  exp_->addSpectrum(spectrum_data_[i].spectrum);
376  }
377  }
378 
379  // Delete batch
380  spectrum_data_.clear();
381  }
382 
383 private:
385  MzXMLHandler();
386 
387  static const XMLCh* s_value_;
388  static const XMLCh* s_count_;
389  static const XMLCh* s_type_;
390  static const XMLCh* s_name_;
391  static const XMLCh* s_version_;
392  static const XMLCh* s_filename_;
393  static const XMLCh* s_filetype_;
394  static const XMLCh* s_filesha1_;
395  static const XMLCh* s_completiontime_;
396  static const XMLCh* s_precision_;
397  static const XMLCh* s_byteorder_;
398  static const XMLCh* s_pairorder_;
399  static const XMLCh* s_compressionType_;
400  static const XMLCh* s_precursorintensity_;
401  static const XMLCh* s_precursorcharge_;
402  static const XMLCh* s_windowwideness_;
403  static const XMLCh* s_mslevel_;
404  static const XMLCh* s_peakscount_;
405  static const XMLCh* s_polarity_;
406  static const XMLCh* s_scantype_;
407  static const XMLCh* s_filterline_;
408  static const XMLCh* s_retentiontime_;
409  static const XMLCh* s_startmz_;
410  static const XMLCh* s_endmz_;
411  static const XMLCh* s_first_;
412  static const XMLCh* s_last_;
413  static const XMLCh* s_phone_;
414  static const XMLCh* s_email_;
415  static const XMLCh* s_uri_;
416  static const XMLCh* s_num_;
417  static const XMLCh* s_intensitycutoff_;
418  static const XMLCh* s_centroided_;
419  static const XMLCh* s_deisotoped_;
420  static const XMLCh* s_chargedeconvoluted_;
421 
422  // init all the static members, which is necessary because otherwise the undefined order will cause problems
424  {
425  static bool init(false);
426  if (!init)
427  {
428  s_value_ = xercesc::XMLString::transcode("value");
429  s_count_ = xercesc::XMLString::transcode("scanCount");
430  s_type_ = xercesc::XMLString::transcode("type");
431  s_name_ = xercesc::XMLString::transcode("name");
432  s_version_ = xercesc::XMLString::transcode("version");
433  s_filename_ = xercesc::XMLString::transcode("fileName");
434  s_filetype_ = xercesc::XMLString::transcode("fileType");
435  s_filesha1_ = xercesc::XMLString::transcode("fileSha1");
436  s_completiontime_ = xercesc::XMLString::transcode("completionTime");
437  s_precision_ = xercesc::XMLString::transcode("precision");
438  s_byteorder_ = xercesc::XMLString::transcode("byteOrder");
439  s_pairorder_ = xercesc::XMLString::transcode("pairOrder");
440  s_compressionType_ = xercesc::XMLString::transcode("compressionType");
441  s_precursorintensity_ = xercesc::XMLString::transcode("precursorIntensity");
442  s_precursorcharge_ = xercesc::XMLString::transcode("precursorCharge");
443  s_windowwideness_ = xercesc::XMLString::transcode("windowWideness");
444  s_mslevel_ = xercesc::XMLString::transcode("msLevel");
445  s_peakscount_ = xercesc::XMLString::transcode("peaksCount");
446  s_polarity_ = xercesc::XMLString::transcode("polarity");
447  s_scantype_ = xercesc::XMLString::transcode("scanType");
448  s_filterline_ = xercesc::XMLString::transcode("filterLine");
449  s_retentiontime_ = xercesc::XMLString::transcode("retentionTime");
450  s_startmz_ = xercesc::XMLString::transcode("startMz");
451  s_endmz_ = xercesc::XMLString::transcode("endMz");
452  s_first_ = xercesc::XMLString::transcode("first");
453  s_last_ = xercesc::XMLString::transcode("last");
454  s_phone_ = xercesc::XMLString::transcode("phone");
455  s_email_ = xercesc::XMLString::transcode("email");
456  s_uri_ = xercesc::XMLString::transcode("URI");
457  s_num_ = xercesc::XMLString::transcode("num");
458  s_intensitycutoff_ = xercesc::XMLString::transcode("intensityCutoff");
459  s_centroided_ = xercesc::XMLString::transcode("centroided");
460  s_deisotoped_ = xercesc::XMLString::transcode("deisotoped");
461  s_chargedeconvoluted_ = xercesc::XMLString::transcode("chargeDeconvoluted");
462 
463  init = true;
464  }
465  return;
466  }
467 
468  };
469 
470  //--------------------------------------------------------------------------------
471 
472  // this cannot be moved into a function as VS2008 does not allow more than 31 static members in a function .. don't ask...
473  template <typename MapType>
474  const XMLCh * MzXMLHandler<MapType>::s_value_ = 0;
475  template <typename MapType>
476  const XMLCh * MzXMLHandler<MapType>::s_count_ = 0;
477  template <typename MapType>
478  const XMLCh * MzXMLHandler<MapType>::s_type_ = 0;
479  template <typename MapType>
480  const XMLCh * MzXMLHandler<MapType>::s_name_ = 0;
481  template <typename MapType>
482  const XMLCh * MzXMLHandler<MapType>::s_version_ = 0;
483  template <typename MapType>
484  const XMLCh * MzXMLHandler<MapType>::s_filename_ = 0;
485  template <typename MapType>
486  const XMLCh * MzXMLHandler<MapType>::s_filetype_ = 0;
487  template <typename MapType>
488  const XMLCh * MzXMLHandler<MapType>::s_filesha1_ = 0;
489  template <typename MapType>
491  template <typename MapType>
492  const XMLCh * MzXMLHandler<MapType>::s_precision_ = 0;
493  template <typename MapType>
494  const XMLCh * MzXMLHandler<MapType>::s_byteorder_ = 0;
495  template <typename MapType>
496  const XMLCh * MzXMLHandler<MapType>::s_pairorder_ = 0;
497  template <typename MapType>
499  template <typename MapType>
501  template <typename MapType>
503  template <typename MapType>
505  template <typename MapType>
506  const XMLCh * MzXMLHandler<MapType>::s_mslevel_ = 0;
507  template <typename MapType>
508  const XMLCh * MzXMLHandler<MapType>::s_peakscount_ = 0;
509  template <typename MapType>
510  const XMLCh * MzXMLHandler<MapType>::s_polarity_ = 0;
511  template <typename MapType>
512  const XMLCh * MzXMLHandler<MapType>::s_scantype_ = 0;
513  template <typename MapType>
514  const XMLCh * MzXMLHandler<MapType>::s_filterline_ = 0;
515  template <typename MapType>
516  const XMLCh * MzXMLHandler<MapType>::s_retentiontime_ = 0;
517  template <typename MapType>
518  const XMLCh * MzXMLHandler<MapType>::s_startmz_ = 0;
519  template <typename MapType>
520  const XMLCh * MzXMLHandler<MapType>::s_endmz_ = 0;
521  template <typename MapType>
522  const XMLCh * MzXMLHandler<MapType>::s_first_ = 0;
523  template <typename MapType>
524  const XMLCh * MzXMLHandler<MapType>::s_last_ = 0;
525  template <typename MapType>
526  const XMLCh * MzXMLHandler<MapType>::s_phone_ = 0;
527  template <typename MapType>
528  const XMLCh * MzXMLHandler<MapType>::s_email_ = 0;
529  template <typename MapType>
530  const XMLCh * MzXMLHandler<MapType>::s_uri_ = 0;
531  template <typename MapType>
532  const XMLCh * MzXMLHandler<MapType>::s_num_ = 0;
533  template <typename MapType>
535  template <typename MapType>
536  const XMLCh * MzXMLHandler<MapType>::s_centroided_ = 0;
537  template <typename MapType>
538  const XMLCh * MzXMLHandler<MapType>::s_deisotoped_ = 0;
539  template <typename MapType>
541 
542  template <typename MapType>
543  void MzXMLHandler<MapType>::startElement(const XMLCh* const /*uri*/,
544  const XMLCh* const /*local_name*/, const XMLCh* const qname,
545  const xercesc::Attributes& attributes)
546  {
547  OPENMS_PRECONDITION(nesting_level_ >= 0, "Nesting level needs to be zero or more")
548 
549  static bool init_static_members(false);
550  if (!init_static_members)
551  {
552  initStaticMembers_();
553  }
554 
555  String tag = sm_.convert(qname);
556  open_tags_.push_back(tag);
557  //std::cout << " -- Start -- "<< tag << " -- " << "\n";
558 
559  //Skip all tags until the the next scan
560  if (skip_spectrum_ && tag != "scan")
561  return;
562 
563  if (tag == "msRun")
564  {
565  Int count = 0;
566  optionalAttributeAsInt_(count, attributes, s_count_);
567  exp_->reserve(count);
568  logger_.startProgress(0, count, "loading mzXML file");
569  scan_count_ = 0;
570  data_processing_.clear();
571  //start and end time are xs:duration. This makes no sense => ignore them
572  }
573  else if (tag == "parentFile")
574  {
575  SourceFile sf;
576  sf.setNameOfFile(attributeAsString_(attributes, s_filename_));
577  sf.setFileType(attributeAsString_(attributes, s_filetype_));
578  sf.setChecksum(attributeAsString_(attributes, s_filesha1_), SourceFile::SHA1);
579  exp_->getSourceFiles().push_back(sf);
580  }
581  else if (tag == "software")
582  {
583  String& parent_tag = *(open_tags_.end() - 2);
584  if (parent_tag == "dataProcessing")
585  {
586  data_processing_.back().getSoftware().setVersion(attributeAsString_(attributes, s_version_));
587  data_processing_.back().getSoftware().setName(attributeAsString_(attributes, s_name_));
588  data_processing_.back().setMetaValue("#type", String(attributeAsString_(attributes, s_type_)));
589 
590  String time;
591  optionalAttributeAsString_(time, attributes, s_completiontime_);
592  data_processing_.back().setCompletionTime(asDateTime_(time));
593  }
594  else if (parent_tag == "msInstrument")
595  {
596  exp_->getInstrument().getSoftware().setVersion(attributeAsString_(attributes, s_version_));
597  exp_->getInstrument().getSoftware().setName(attributeAsString_(attributes, s_name_));
598  }
599  }
600  else if (tag == "peaks")
601  {
602  //precision
603  spectrum_data_.back().precision_ = "32";
604  optionalAttributeAsString_(spectrum_data_.back().precision_, attributes, s_precision_);
605  if (spectrum_data_.back().precision_ != "32" && spectrum_data_.back().precision_ != "64")
606  {
607  error(LOAD, String("Invalid precision '") + spectrum_data_.back().precision_ + "' in element 'peaks'");
608  }
609  //byte order
610  String byte_order = "network";
611  optionalAttributeAsString_(byte_order, attributes, s_byteorder_);
612  if (byte_order != "network")
613  {
614  error(LOAD, String("Invalid or missing byte order '") + byte_order + "' in element 'peaks'. Must be 'network'!");
615  }
616  //pair order
617  String pair_order = "m/z-int";
618  optionalAttributeAsString_(pair_order, attributes, s_pairorder_);
619  if (pair_order != "m/z-int")
620  {
621  error(LOAD, String("Invalid or missing pair order '") + pair_order + "' in element 'peaks'. Must be 'm/z-int'!");
622  }
623  //compressionType
624  spectrum_data_.back().compressionType_ = "none";
625  optionalAttributeAsString_(spectrum_data_.back().compressionType_, attributes, s_compressionType_);
626  if (spectrum_data_.back().compressionType_ != "none" && spectrum_data_.back().compressionType_ != "zlib")
627  {
628  error(LOAD, String("Invalid compression type ") + spectrum_data_.back().compressionType_ + "in elements 'peaks'. Must be 'none' or 'zlib'! ");
629  }
630  }
631  else if (tag == "precursorMz")
632  {
633  //add new precursor
634  spectrum_data_.back().spectrum.getPrecursors().push_back(Precursor());
635  //intensity
636  try
637  {
638  spectrum_data_.back().spectrum.getPrecursors().back().setIntensity(attributeAsDouble_(attributes, s_precursorintensity_));
639  }
640  catch (Exception::ParseError& /*e*/)
641  {
642  error(LOAD, "Mandatory attribute 'precursorIntensity' of tag 'precursorMz' not found! Setting precursor intensity to zero!");
643  }
644  //charge
645  Int charge = 0;
646  if (optionalAttributeAsInt_(charge, attributes, s_precursorcharge_))
647  {
648  spectrum_data_.back().spectrum.getPrecursors().back().setCharge(charge);
649  }
650  //window bounds (here only the width is stored in both fields - this is corrected when we parse the m/z position)
651  double window = 0.0;
652  if (optionalAttributeAsDouble_(window, attributes, s_windowwideness_))
653  {
654  spectrum_data_.back().spectrum.getPrecursors().back().setIsolationWindowLowerOffset(window);
655  }
656  }
657  else if (tag == "scan")
658  {
659  skip_spectrum_ = false;
660  nesting_level_++;
661 
662  if (options_.getMetadataOnly())
663  throw EndParsingSoftly(__FILE__, __LINE__, __PRETTY_FUNCTION__);
664 
665  // check if the scan is in the desired MS / RT range
666  UInt ms_level = attributeAsInt_(attributes, s_mslevel_);
667  if (ms_level == 0)
668  {
669  warning(LOAD, String("Invalid 'msLevel' attribute with value '0' in 'scan' element found. Assuming ms level 1!"));
670  ms_level = 1;
671  }
672 
673  //parse retention time and convert it from xs:duration to seconds
674  double retention_time = 0.0;
675  String time_string = "";
676  if (optionalAttributeAsString_(time_string, attributes, s_retentiontime_))
677  {
678  time_string = time_string.suffix('T');
679  //std::cout << "Initial trim: " << time_string << "\n";
680  if (time_string.has('H'))
681  {
682  retention_time += 3600 * asDouble_(time_string.prefix('H'));
683  time_string = time_string.suffix('H');
684  //std::cout << "After H: " << time_string << "\n";
685  }
686  if (time_string.has('M'))
687  {
688  retention_time += 60 * asDouble_(time_string.prefix('M'));
689  time_string = time_string.suffix('M');
690  //std::cout << "After M: " << time_string << "\n";
691  }
692  if (time_string.has('S'))
693  {
694  retention_time += asDouble_(time_string.prefix('S'));
695  time_string = time_string.suffix('S');
696  //std::cout << "After S: " << time_string << "\n";
697  }
698  }
699 
700  logger_.setProgress(scan_count_);
701 
702  if ((options_.hasRTRange() && !options_.getRTRange().encloses(DPosition<1>(retention_time)))
703  || (options_.hasMSLevels() && !options_.containsMSLevel(ms_level))
704  || options_.getSizeOnly())
705  {
706  // skip this tag
707  skip_spectrum_ = true;
708  ++scan_count_;
709  return;
710  }
711 
712  // Add a new spectrum, initialize and set MS level and RT
713  spectrum_data_.resize(spectrum_data_.size() + 1); // TODO !!
714  spectrum_data_.back().peak_count_ = 0;
715 
716  spectrum_data_.back().spectrum.setMSLevel(ms_level);
717  spectrum_data_.back().spectrum.setRT(retention_time);
718  spectrum_data_.back().spectrum.setNativeID(String("scan=") + attributeAsString_(attributes, s_num_));
719  //peak count == twice the scan size
720  spectrum_data_.back().peak_count_ = attributeAsInt_(attributes, s_peakscount_);
721  spectrum_data_.back().spectrum.reserve(spectrum_data_.back().peak_count_ / 2 + 1);
722  spectrum_data_.back().spectrum.setDataProcessing(data_processing_);
723 
724  //centroided, chargeDeconvoluted, deisotoped, collisionEnergy are ignored
725 
726  //other optional attributes
727  ScanWindow window;
728  optionalAttributeAsDouble_(window.begin, attributes, s_startmz_);
729  optionalAttributeAsDouble_(window.end, attributes, s_endmz_);
730  if (window.begin != 0.0 || window.end != 0.0)
731  {
732  spectrum_data_.back().spectrum.getInstrumentSettings().getScanWindows().push_back(window);
733  }
734 
735  String polarity = "any";
736  optionalAttributeAsString_(polarity, attributes, s_polarity_);
737  spectrum_data_.back().spectrum.getInstrumentSettings().setPolarity((IonSource::Polarity) cvStringToEnum_(0, polarity, "polarity"));
738 
739  // Filter string (see CV term MS:1000512 in mzML)
740  String filterLine = "";
741  optionalAttributeAsString_(filterLine, attributes, s_filterline_);
742  if (!filterLine.empty())
743  {
744  spectrum_data_.back().spectrum.setMetaValue("filter string", filterLine);
745  }
746 
747  String type = "";
748  optionalAttributeAsString_(type, attributes, s_scantype_);
749  if (type == "")
750  {
751  //unknown/unset => do nothing here => no warning in the end
752  }
753  else if (type == "zoom")
754  {
755  spectrum_data_.back().spectrum.getInstrumentSettings().setZoomScan(true);
756  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
757  }
758  else if (type == "Full")
759  {
760  if (ms_level > 1)
761  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MSNSPECTRUM);
762  else
763  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
764  }
765  else if (type == "SIM")
766  {
767  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::SIM);
768  }
769  else if (type == "SRM" || type == "MRM")
770  {
771  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::SRM);
772  }
773  else if (type == "CRM")
774  {
775  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::CRM);
776  }
777  else if (type == "Q1")
778  {
779  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
780  }
781  else if (type == "Q3")
782  {
783  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
784  }
785  else if (type == "EMS") //Non-standard type: Enhanced MS (ABI - Sashimi converter)
786  {
787  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
788  }
789  else if (type == "EPI") //Non-standard type: Enhanced Product Ion (ABI - Sashimi converter)
790  {
791  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
792  spectrum_data_.back().spectrum.setMSLevel(2);
793  }
794  else if (type == "ER") // Non-standard type: Enhanced Resolution (ABI - Sashimi converter)
795  {
796  spectrum_data_.back().spectrum.getInstrumentSettings().setZoomScan(true);
797  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
798  }
799  else
800  {
801  spectrum_data_.back().spectrum.getInstrumentSettings().setScanMode(InstrumentSettings::MASSSPECTRUM);
802  warning(LOAD, String("Unknown scan mode '") + type + "'. Assuming full scan");
803  }
804 
805  ++scan_count_;
806  }
807  else if (tag == "operator")
808  {
809  exp_->getContacts().resize(1);
810  exp_->getContacts().back().setFirstName(attributeAsString_(attributes, s_first_));
811  exp_->getContacts().back().setLastName(attributeAsString_(attributes, s_last_));
812 
813  String tmp = "";
814  optionalAttributeAsString_(tmp, attributes, s_email_);
815  exp_->getContacts().back().setEmail(tmp);
816 
817  tmp = "";
818  optionalAttributeAsString_(tmp, attributes, s_phone_);
819  if (tmp != "")
820  {
821  exp_->getContacts().back().setMetaValue("#phone", tmp);
822  }
823 
824  tmp = "";
825  optionalAttributeAsString_(tmp, attributes, s_uri_);
826  exp_->getContacts().back().setURL(tmp);
827  }
828  else if (tag == "msManufacturer")
829  {
830  exp_->getInstrument().setVendor(attributeAsString_(attributes, s_value_));
831  }
832  else if (tag == "msModel")
833  {
834  exp_->getInstrument().setModel(attributeAsString_(attributes, s_value_));
835  }
836  else if (tag == "msIonisation")
837  {
838  exp_->getInstrument().getIonSources().resize(1);
839  exp_->getInstrument().getIonSources()[0].setIonizationMethod((IonSource::IonizationMethod) cvStringToEnum_(2, attributeAsString_(attributes, s_value_), "msIonization"));
840  }
841  else if (tag == "msMassAnalyzer")
842  {
843  exp_->getInstrument().getMassAnalyzers().resize(1);
844  exp_->getInstrument().getMassAnalyzers()[0].setType((MassAnalyzer::AnalyzerType) cvStringToEnum_(3, attributeAsString_(attributes, s_value_), "msMassAnalyzer"));
845  }
846  else if (tag == "msDetector")
847  {
848  exp_->getInstrument().getIonDetectors().resize(1);
849  exp_->getInstrument().getIonDetectors()[0].setType((IonDetector::Type) cvStringToEnum_(4, attributeAsString_(attributes, s_value_), "msDetector"));
850  }
851  else if (tag == "msResolution")
852  {
853  exp_->getInstrument().getMassAnalyzers()[0].setResolutionMethod((MassAnalyzer::ResolutionMethod) cvStringToEnum_(5, attributeAsString_(attributes, s_value_), "msResolution"));
854  }
855  else if (tag == "dataProcessing")
856  {
857  data_processing_.push_back(DataProcessing());
858 
859  String boolean = "";
860  optionalAttributeAsString_(boolean, attributes, s_deisotoped_);
861  if (boolean == "true" || boolean == "1")
862  {
863  data_processing_.back().getProcessingActions().insert(DataProcessing::DEISOTOPING);
864  }
865 
866  boolean = "";
867  optionalAttributeAsString_(boolean, attributes, s_chargedeconvoluted_);
868  if (boolean == "true" || boolean == "1")
869  {
870  data_processing_.back().getProcessingActions().insert(DataProcessing::CHARGE_DECONVOLUTION);
871  }
872 
873  double cutoff = 0.0;
874  optionalAttributeAsDouble_(cutoff, attributes, s_intensitycutoff_);
875  if (cutoff != 0.0)
876  {
877  data_processing_.back().setMetaValue("#intensity_cutoff", cutoff);
878  }
879 
880  boolean = "";
881  optionalAttributeAsString_(boolean, attributes, s_centroided_);
882  if (boolean == "true" || boolean == "1")
883  {
884  data_processing_.back().getProcessingActions().insert(DataProcessing::PEAK_PICKING);
885  }
886  }
887  else if (tag == "nameValue")
888  {
889  String name = "";
890  optionalAttributeAsString_(name, attributes, s_name_);
891  if (name == "")
892  return;
893 
894  String value = "";
895  optionalAttributeAsString_(value, attributes, s_value_);
896 
897  String& parent_tag = *(open_tags_.end() - 2);
898 
899  if (parent_tag == "msInstrument")
900  {
901  exp_->getInstrument().setMetaValue(name, value);
902  }
903  else if (parent_tag == "scan")
904  {
905  spectrum_data_.back().spectrum.setMetaValue(name, value);
906  }
907  else
908  {
909  std::cout << " Warning: Unexpected tag 'nameValue' in tag '" << parent_tag << "'" << "\n";
910  }
911  }
912  else if (tag == "processingOperation")
913  {
914  String name = "";
915  optionalAttributeAsString_(name, attributes, s_name_);
916  if (name == "")
917  return;
918 
919  String value = "";
920  optionalAttributeAsString_(value, attributes, s_value_);
921 
922  data_processing_.back().setMetaValue(name, value);
923  }
924 
925  //std::cout << " -- !Start -- " << "\n";
926  }
927 
928  template <typename MapType>
929  void MzXMLHandler<MapType>::endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname)
930  {
931  OPENMS_PRECONDITION(nesting_level_ >= 0, "Nesting level needs to be zero or more")
932 
933  //std::cout << " -- End -- " << sm_.convert(qname) << " -- " << "\n";
934 
935  static const XMLCh* s_mzxml = xercesc::XMLString::transcode("mzXML");
936  static const XMLCh* s_scan = xercesc::XMLString::transcode("scan");
937 
938  open_tags_.pop_back();
939 
940  if (equal_(qname, s_mzxml))
941  {
942  // Flush the remaining data
943  populateSpectraWithData_();
944 
945  // End of mzXML
946  logger_.endProgress();
947  }
948  else if (equal_(qname, s_scan))
949  {
950  // End of scan: go up one nesting level
951  // Check whether to populate spectra when on highest nesting level
952  nesting_level_--;
953  OPENMS_PRECONDITION(nesting_level_ >= 0, "Nesting level needs to be zero or more")
954 
955  if (nesting_level_ == 0 && spectrum_data_.size() >= options_.getMaxDataPoolSize())
956  {
957  populateSpectraWithData_();
958  }
959  }
960  //std::cout << " -- End -- " << "\n";
961  sm_.clear();
962  }
963 
964  template <typename MapType>
965  void MzXMLHandler<MapType>::characters(const XMLCh* const chars, const XMLSize_t length)
966  {
967  //Abort if this spectrum should be skipped
968  if (skip_spectrum_)
969  return;
970 
971  if (open_tags_.back() == "peaks")
972  {
973  //chars may be split to several chunks => concatenate them
974  if (options_.getFillData())
975  {
976  // Since we convert a Base64 string here, it can only contain plain ASCII
977  sm_.appendASCII(chars, length, spectrum_data_.back().char_rest_);
978  }
979  }
980  else if (open_tags_.back() == "offset" || open_tags_.back() == "indexOffset" || open_tags_.back() == "sha1")
981  {
982 
983  }
984  else if (open_tags_.back() == "precursorMz")
985  {
986  char* transcoded_chars = sm_.convert(chars);
987  double mz_pos = asDouble_(transcoded_chars);
988  //precursor m/z
989  spectrum_data_.back().spectrum.getPrecursors().back().setMZ(mz_pos);
990  //update window bounds - center them around the m/z pos
991  double window_width = spectrum_data_.back().spectrum.getPrecursors().back().getIsolationWindowLowerOffset();
992  if (window_width != 0.0)
993  {
994  spectrum_data_.back().spectrum.getPrecursors().back().setIsolationWindowLowerOffset(0.5 * window_width);
995  spectrum_data_.back().spectrum.getPrecursors().back().setIsolationWindowUpperOffset(0.5 * window_width);
996  }
997  }
998  else if (open_tags_.back() == "comment")
999  {
1000  char* transcoded_chars = sm_.convert(chars);
1001  String parent_tag = *(open_tags_.end() - 2);
1002  //std::cout << "- Comment of parent " << parent_tag << "\n";
1003 
1004  if (parent_tag == "msInstrument")
1005  {
1006  exp_->getInstrument().setMetaValue("#comment", String(transcoded_chars));
1007  }
1008  else if (parent_tag == "dataProcessing")
1009  {
1010  //this is currently ignored
1011  }
1012  else if (parent_tag == "scan")
1013  {
1014  spectrum_data_.back().spectrum.setComment(transcoded_chars);
1015  }
1016  else if (String(transcoded_chars).trim() != "")
1017  {
1018  warning(LOAD, String("Unhandled comment '") + transcoded_chars + "' in element '" + open_tags_.back() + "'");
1019  }
1020  }
1021  else
1022  {
1023  char* transcoded_chars = sm_.convert(chars);
1024  if (String(transcoded_chars).trim() != "")
1025  {
1026  warning(LOAD, String("Unhandled character content '") + transcoded_chars + "' in element '" + open_tags_.back() + "'");
1027  }
1028  }
1029  }
1030 
1031  template <typename MapType>
1032  void MzXMLHandler<MapType>::writeTo(std::ostream& os)
1033  {
1034  //determine how many spectra there are (count only those with peaks)
1035  UInt count_tmp_ = 0;
1036  for (Size s = 0; s < cexp_->size(); s++)
1037  {
1038  const SpectrumType& spec = (*cexp_)[s];
1039  if (spec.size() != 0)
1040  ++count_tmp_;
1041  }
1042  if (count_tmp_ == 0)
1043  ++count_tmp_;
1044  logger_.startProgress(0, cexp_->size(), "storing mzXML file");
1045  os << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
1046  << "<mzXML xmlns=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1\" "
1047  << "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
1048  << "xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 "
1049  << "http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd\">\n"
1050  << "\t<msRun scanCount=\"" << count_tmp_ << "\">\n";
1051 
1052  //----------------------------------------------------------------------------------------
1053  // parent files
1054  //----------------------------------------------------------------------------------------
1055  if (cexp_->getSourceFiles().empty())
1056  {
1057  os << "\t\t<parentFile fileName=\"\" fileType=\"processedData\" fileSha1=\"0000000000000000000000000000000000000000\"/>\n";
1058  }
1059  else
1060  {
1061  for (Size i = 0; i < cexp_->getSourceFiles().size(); ++i)
1062  {
1063  const SourceFile& sf = cexp_->getSourceFiles()[i];
1064  os << "\t\t<parentFile fileName=\"" << sf.getNameOfFile() << "\" fileType=\"";
1065  //file type is an enum in mzXML => search for 'raw' string
1066  String tmp_string = sf.getFileType();
1067  tmp_string.toLower();
1068  if (tmp_string.hasSubstring("raw"))
1069  {
1070  os << "RAWData";
1071  }
1072  else
1073  {
1074  os << "processedData";
1075  }
1076  //Sha1 checksum must have 40 characters => create a fake if it is unknown
1077  os << "\" fileSha1=\"";
1078  tmp_string = sf.getChecksum();
1079  if (sf.getChecksum().size() != 40 || sf.getChecksumType() != SourceFile::SHA1)
1080  {
1081  os << "0000000000000000000000000000000000000000";
1082  }
1083  else
1084  {
1085  os << sf.getChecksum();
1086  }
1087  os << "\"/>\n";
1088  }
1089  }
1090 
1091  //----------------------------------------------------------------------------------------
1092  //instrument
1093  //----------------------------------------------------------------------------------------
1094  if (cexp_->getInstrument() != Instrument() || cexp_->getContacts().size() != 0)
1095  {
1096  const Instrument& inst = cexp_->getInstrument();
1097  os << "\t\t<msInstrument>\n"
1098  << "\t\t\t<msManufacturer category=\"msManufacturer\" value=\"" << inst.getVendor() << "\"/>\n" << "\t\t\t<msModel category=\"msModel\" value=\"" << inst.getModel() << "\"/>\n";
1099  if (inst.getIonSources().empty() || !inst.getIonSources()[0].getIonizationMethod())
1100  {
1101  os << "\t\t\t<msIonisation category=\"msIonisation\" value=\"\"/>\n";
1102  }
1103  else
1104  {
1105  os << "\t\t\t<msIonisation category=\"msIonisation\" value=\"" << cv_terms_[2][inst.getIonSources()[0].getIonizationMethod()] << "\"/>\n";
1106  }
1107  const std::vector<MassAnalyzer>& analyzers = inst.getMassAnalyzers();
1108  if (analyzers.empty() || !analyzers[0].getResolutionMethod())
1109  {
1110  os << "\t\t\t<msMassAnalyzer category=\"msMassAnalyzer\" value=\"\"/>\n";
1111  }
1112  else
1113  {
1114  os << "\t\t\t<msMassAnalyzer category=\"msMassAnalyzer\" value=\"" << cv_terms_[3][analyzers[0].getType()] << "\"/>\n";
1115  }
1116  if (inst.getIonDetectors().empty() || !inst.getIonDetectors()[0].getType())
1117  {
1118  os << "\t\t\t<msDetector category=\"msDetector\" value=\"\"/>\n";
1119  }
1120  else
1121  {
1122  os << "\t\t\t<msDetector category=\"msDetector\" value=\"" << cv_terms_[4][inst.getIonDetectors()[0].getType()] << "\"/>\n";
1123  }
1124  os << "\t\t\t<software type=\"acquisition\" name=\"" << inst.getSoftware().getName() << "\" version=\"" << inst.getSoftware().getVersion() << "\"/>\n";
1125  if (analyzers.empty() || !analyzers[0].getResolutionMethod())
1126  {
1127  os << "\t\t\t<msResolution category=\"msResolution\" value=\"\"/>\n";
1128  }
1129  else
1130  {
1131  os << "\t\t\t<msResolution category=\"msResolution\" value=\"" << cv_terms_[5][analyzers[0].getResolutionMethod()] << "\"/>\n";
1132  }
1133 
1134  if (cexp_->getContacts().size() > 0)
1135  {
1136  const ContactPerson& cont = cexp_->getContacts()[0];
1137 
1138  os << "\t\t\t<operator first=\"" << cont.getFirstName() << "\" last=\"" << cont.getLastName() << "\"";
1139 
1140  if (cont.getEmail() != "")
1141  {
1142  os << " email=\"" << cont.getEmail() << "\"";
1143  }
1144 
1145  if (cont.getURL() != "")
1146  {
1147  os << " URI=\"" << cont.getURL() << "\"";
1148  }
1149 
1150  if (cont.metaValueExists("#phone"))
1151  {
1152  os << " phone=\"" << writeXMLEscape(cont.getMetaValue("#phone").toString()) << "\"";
1153  }
1154 
1155  os << "/>\n";
1156  }
1157  writeUserParam_(os, inst, 3);
1158 
1159  if (inst.metaValueExists("#comment"))
1160  {
1161  os << "\t\t\t<comment>" << writeXMLEscape(inst.getMetaValue("#comment")) << "</comment>\n";
1162  }
1163 
1164  os << "\t\t</msInstrument>\n";
1165  }
1166 
1167  //----------------------------------------------------------------------------------------
1168  //data processing (the information of the first spectrum is assigned to the whole file)
1169  //----------------------------------------------------------------------------------------
1170  if (cexp_->size() == 0 || (*cexp_)[0].getDataProcessing().empty())
1171  {
1172  os << "\t\t<dataProcessing>\n"
1173  << "\t\t\t<software type=\"processing\" name=\"\" version=\"\"/>\n"
1174  << "\t\t</dataProcessing>\n";
1175  }
1176  else
1177  {
1178  for (Size i = 0; i < (*cexp_)[0].getDataProcessing().size(); ++i)
1179  {
1180  const DataProcessing& data_processing = (*cexp_)[0].getDataProcessing()[i];
1181  os << "\t\t<dataProcessing deisotoped=\""
1182  << data_processing.getProcessingActions().count(DataProcessing::DEISOTOPING)
1183  << "\" chargeDeconvoluted=\""
1185  << "\" centroided=\""
1186  << data_processing.getProcessingActions().count(DataProcessing::PEAK_PICKING)
1187  << "\"";
1188  if (data_processing.metaValueExists("#intensity_cutoff"))
1189  {
1190  os << " intensityCutoff=\"" << writeXMLEscape(data_processing.getMetaValue("#intensity_cutoff").toString()) << "\"";
1191  }
1192  os << ">\n"
1193  << "\t\t\t<software type=\"";
1194  if (data_processing.metaValueExists("#type"))
1195  {
1196  os << writeXMLEscape(data_processing.getMetaValue("#type").toString());
1197  }
1198  else
1199  {
1200  os << "processing";
1201  }
1202 
1203  os << "\" name=\"" << data_processing.getSoftware().getName()
1204  << "\" version=\"" << data_processing.getSoftware().getVersion();
1205 
1206  if (data_processing.getCompletionTime() != DateTime())
1207  {
1208  os << "\" completionTime=\"" << data_processing.getCompletionTime().get().substitute(' ', 'T');
1209  }
1210  os << "\"/>\n";
1211  writeUserParam_(os, data_processing, 3, "processingOperation");
1212 
1213  os << "\t\t</dataProcessing>\n";
1214  }
1215  }
1216 
1217  //check if the nativeID of all spectra are numbers or numbers prefixed with 'scan='
1218  //If not we need to renumber all spectra.
1219  bool all_numbers = true;
1220  bool all_empty = true;
1221  bool all_prefixed_numbers = true;
1222  for (Size s = 0; s < cexp_->size(); s++)
1223  {
1224  String native_id = (*cexp_)[s].getNativeID();
1225  if (!native_id.hasPrefix("scan="))
1226  {
1227  all_prefixed_numbers = false;
1228  }
1229  else
1230  {
1231  native_id = native_id.substr(5);
1232  }
1233  try
1234  {
1235  native_id.toInt();
1236  }
1238  {
1239  all_numbers = false;
1240  all_prefixed_numbers = false;
1241  if (native_id != "")
1242  {
1243  all_empty = false;
1244  }
1245  }
1246  }
1247  //If we need to renumber and the nativeIDs were not empty, warn the user
1248  if (!all_numbers && !all_empty)
1249  {
1250  warning(STORE, "Not all spectrum native IDs are numbers or correctly prefixed with 'scan='. The spectra are renumbered and the native IDs are lost!");
1251  }
1252 
1253  // write scans
1254  std::stack<UInt> open_scans;
1255  for (Size s = 0; s < cexp_->size(); s++)
1256  {
1257  logger_.setProgress(s);
1258  const SpectrumType& spec = (*cexp_)[s];
1259 
1260  UInt ms_level = spec.getMSLevel();
1261  open_scans.push(ms_level);
1262 
1263  Size spectrum_id = s + 1;
1264  if (all_prefixed_numbers)
1265  {
1266  spectrum_id = spec.getNativeID().substr(5).toInt();
1267  }
1268  else if (all_numbers)
1269  {
1270  spectrum_id = spec.getNativeID().toInt();
1271  }
1272 
1273  os << String(ms_level + 1, '\t')
1274  << "<scan num=\"" << spectrum_id << "\" msLevel=\""
1275  << ms_level << "\" peaksCount=\""
1276  << spec.size() << "\" polarity=\"";
1278  {
1279  os << "+";
1280  }
1282  {
1283  os << "-";
1284  }
1285  else
1286  {
1287  os << "any";
1288  }
1289 
1290  //scan type
1291  switch (spec.getInstrumentSettings().getScanMode())
1292  {
1294  break;
1295 
1299  if (spec.getInstrumentSettings().getZoomScan())
1300  {
1301  os << "\" scanType=\"zoom";
1302  }
1303  else
1304  {
1305  os << "\" scanType=\"Full";
1306  }
1307  break;
1308 
1310  os << "\" scanType=\"SIM";
1311  break;
1312 
1314  os << "\" scanType=\"SRM";
1315  break;
1316 
1318  os << "\" scanType=\"CRM";
1319  break;
1320 
1321  default:
1322  os << "\" scanType=\"Full";
1323  warning(STORE, String("Scan type '") + InstrumentSettings::NamesOfScanMode[spec.getInstrumentSettings().getScanMode()] + "' not supported by mzXML. Using 'Full' scan mode!");
1324  }
1325 
1326  // filter line
1327  if (spec.metaValueExists("filter string") )
1328  {
1329  os << "\" filterLine=\"";
1330  os << writeXMLEscape ( (String)spec.getMetaValue("filter string") );
1331  }
1332 
1333  // base peak mz (used by some programs like MAVEN), according to xsd:
1334  // "m/z of the base peak (most intense peak)"
1335  os << "\" basePeakMz=\"";
1336  double basePeakInt = 0;
1337  double basePeakMz = 0;
1338  for (Size j = 0; j < spec.size(); j++)
1339  {
1340  if (spec[j].getIntensity() > basePeakInt)
1341  {
1342  basePeakInt = spec[j].getIntensity();
1343  basePeakMz = spec[j].getMZ();
1344  }
1345  }
1346  os << basePeakMz;
1347 
1348  // retention time
1349  os << "\" retentionTime=\"";
1350  if (spec.getRT() < 0)
1351  os << "-";
1352  os << "PT" << std::fabs(spec.getRT()) << "S\"";
1353  if (!spec.getInstrumentSettings().getScanWindows().empty())
1354  {
1355  os << " startMz=\"" << spec.getInstrumentSettings().getScanWindows()[0].begin << "\" endMz=\"" << spec.getInstrumentSettings().getScanWindows()[0].end << "\"";
1356  }
1357  if (spec.getInstrumentSettings().getScanWindows().size() > 1)
1358  {
1359  warning(STORE, "The MzXML format can store only one scan window for each scan. Only the first one is stored!");
1360  }
1361 
1362  // end of "scan" attributes
1363  os << ">\n";
1364 
1365 
1366  for (Size i = 0; i < spec.getPrecursors().size(); ++i)
1367  {
1368  const Precursor& precursor = spec.getPrecursors()[i];
1369  //intensity
1370  os << String(ms_level + 2, '\t') << "<precursorMz precursorIntensity=\"" << precursor.getIntensity();
1371  //charge
1372  if (precursor.getCharge() != 0)
1373  os << "\" precursorCharge=\"" << precursor.getCharge();
1374  //window size
1375  if (precursor.getIsolationWindowLowerOffset() + precursor.getIsolationWindowUpperOffset() > 0.0)
1376  os << "\" windowWideness=\"" << (precursor.getIsolationWindowUpperOffset() + precursor.getIsolationWindowLowerOffset());
1377  //m/z
1378  os << "\">" << precursor.getMZ() << "</precursorMz>\n";
1379  }
1380 
1381  if (!spec.empty())
1382  {
1383  os << String(ms_level + 2, '\t') << "<peaks precision=\"32\"" << " byteOrder=\"network\" pairOrder=\"m/z-int\">";
1384 
1385  //std::cout << "Writing scan " << s << "\n";
1386  std::vector<float> tmp;
1387  for (Size i = 0; i < spec.size(); i++)
1388  {
1389  tmp.push_back(spec[i].getMZ());
1390  tmp.push_back(spec[i].getIntensity());
1391  }
1392 
1393  String encoded;
1394  decoder_.encode(tmp, Base64::BYTEORDER_BIGENDIAN, encoded);
1395  os << encoded << "</peaks>\n";
1396  }
1397  else
1398  {
1399  os << String(ms_level + 2, '\t') << "<peaks precision=\"32\"" << " byteOrder=\"network\" pairOrder=\"m/z-int\" xsi:nil=\"true\"/>\n";
1400  }
1401 
1402  writeUserParam_(os, spec, ms_level + 2);
1403  if (spec.getComment() != "")
1404  {
1405  os << String(ms_level + 2, '\t') << "<comment>" << spec.getComment() << "</comment>\n";
1406  }
1407 
1408  //check MS level of next scan and close scans (scans can be nested)
1409  UInt next_ms_level = 0;
1410  if (s < cexp_->size() - 1)
1411  {
1412  next_ms_level = ((*cexp_)[s + 1]).getMSLevel();
1413  }
1414  //std::cout << "scan: " << s << " this: " << ms_level << " next: " << next_ms_level << "\n";
1415  if (next_ms_level <= ms_level)
1416  {
1417  for (Size i = 0; i <= ms_level - next_ms_level && !open_scans.empty(); ++i)
1418  {
1419  os << String(ms_level - i + 1, '\t') << "</scan>\n";
1420  open_scans.pop();
1421  }
1422  }
1423  }
1424 
1425  os << "\t</msRun>\n"
1426  << "\t<indexOffset>0</indexOffset>\n"
1427  << "</mzXML>\n";
1428 
1429  logger_.endProgress();
1430  spec_write_counter_ = 1;
1431  }
1432 
1433  } // namespace Internal
1434 
1435 } // namespace OpenMS
1436 
1437 #endif
Deisotoping.
Definition: DataProcessing.h:62
const DataValue & getMetaValue(const String &name) const
returns the value corresponding to a string
Description of the applied preprocessing steps.
Definition: DataProcessing.h:51
ChecksumType getChecksumType() const
returns the checksum type
void writeUserParam_(std::ostream &os, const MetaInfoInterface &meta, int indent=4, String tag="nameValue")
write metaInfo to xml (usually in nameValue-tag)
Definition: MzXMLHandler.h:229
Big endian type.
Definition: Base64.h:78
Description of a MS instrument.
Definition: Instrument.h:64
bool hasMZRange() const
returns true if an MZ range has been set
double getIsolationWindowLowerOffset() const
Returns the lower offset from the target m/z.
const String & getNameOfFile() const
returns the file name
const Software & getSoftware() const
returns a const reference to the instrument software
static const XMLCh * s_count_
Definition: MzXMLHandler.h:388
A more convenient string class.
Definition: String.h:57
Precursor meta information.
Definition: Precursor.h:56
Class to encode and decode Base64.
Definition: Base64.h:64
Exception that is thrown if the parsing is ended by some event (e.g. if only a prefix of the XML file...
Definition: XMLHandler.h:104
const std::vector< IonSource > & getIonSources() const
returns a const reference to the ion source list
bool getAlwaysAppendData() const
returns whether or not to always append the data to the given map (even if a consumer is given) ...
static const XMLCh * s_peakscount_
Definition: MzXMLHandler.h:404
bool encloses(const PositionType &position) const
Checks whether this range contains a certain point.
Definition: DRange.h:172
UInt getMSLevel() const
Returns the MS level.
Definition: MSSpectrum.h:259
static const XMLCh * s_name_
Definition: MzXMLHandler.h:390
bool hasIntensityRange() const
returns true if an intensity range has been set
IntensityType getIntensity() const
Definition: Peak1D.h:109
static const XMLCh * s_first_
Definition: MzXMLHandler.h:411
static const XMLCh * s_phone_
Definition: MzXMLHandler.h:413
static const XMLCh * s_centroided_
Definition: MzXMLHandler.h:418
static const XMLCh * s_filterline_
Definition: MzXMLHandler.h:407
UInt spec_write_counter_
spectrum counter (spectra without peaks are not written)
Definition: MzXMLHandler.h:217
void setMSDataConsumer(Interfaces::IMSDataConsumer< MapType > *consumer)
Set the IMSDataConsumer consumer which will consume the read data.
Definition: MzXMLHandler.h:129
const std::set< ProcessingAction > & getProcessingActions() const
returns a const reference to the applied processing actions
static const XMLCh * s_precursorcharge_
Definition: MzXMLHandler.h:401
static const XMLCh * s_filesha1_
Definition: MzXMLHandler.h:394
UInt peak_count_
Definition: MzXMLHandler.h:201
const DRange< 1 > & getIntensityRange() const
returns the intensity range
Contact person information.
Definition: ContactPerson.h:50
const String & getComment() const
returns the free-text comment
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:107
Negative polarity.
Definition: IonSource.h:144
const std::vector< MassAnalyzer > & getMassAnalyzers() const
returns a const reference to the mass analyzer list
String toString() const
Conversion to String.
void setFileType(const String &file_type)
sets the file type
XML handlers for MzXMLFile.
Definition: MzXMLHandler.h:62
CoordinateType getMZ() const
Non-mutable access to m/z.
Definition: Peak1D.h:114
Positive polarity.
Definition: IonSource.h:143
Definition: MassAnalyzer.h:70
Int getCharge() const
Non-mutable access to the charge.
virtual void startElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname, const xercesc::Attributes &attributes)
Parsing method for opening tags.
Definition: MzXMLHandler.h:543
Base class for XML handlers.
Definition: XMLHandler.h:99
bool getSortSpectraByMZ() const
gets whether or not peaks in spectra should be sorted
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:128
const std::vector< IonDetector > & getIonDetectors() const
returns a const reference to the ion detector list
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:47
static const XMLCh * s_retentiontime_
Definition: MzXMLHandler.h:408
double begin
Begin of the window.
Definition: ScanWindow.h:62
XMLHandler()
Not implemented.
bool has(Byte byte) const
true if String contains the byte, false otherwise
MS2+ mass spectrum, is a &quot;mass spectrum&quot;.
Definition: InstrumentSettings.h:58
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:120
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:111
Type
Detector type.
Definition: IonDetector.h:52
Selected reaction monitoring scan Synonyms: &#39;Multiple reaction monitoring scan&#39;, &#39;SRM scan&#39;, &#39;MRM scan&#39;.
Definition: InstrumentSettings.h:60
void setChecksum(const String &checksum, ChecksumType type)
sets the file&#39;s checksum
static const XMLCh * s_scantype_
Definition: MzXMLHandler.h:406
static String writeXMLEscape(const String &to_escape)
Escapes a string and returns the escaped string.
Definition: XMLHandler.h:169
const MapType * cexp_
map pointer for writing
Definition: MzXMLHandler.h:182
double getIsolationWindowUpperOffset() const
Returns the upper offset from the target m/z.
bool getZoomScan() const
return if this scan is a zoom (enhanced resolution) scan
ScanMode getScanMode() const
returns the scan mode
Definition: IonDetector.h:76
MSExperiment< Peak1D > MapType
Definition: PeakPickerIterative.cpp:87
void get(UInt &month, UInt &day, UInt &year, UInt &hour, UInt &minute, UInt &second) const
Fills the arguments with the date and the time.
ResolutionMethod
resolution method
Definition: MassAnalyzer.h:80
static const XMLCh * s_polarity_
Definition: MzXMLHandler.h:405
virtual ~MzXMLHandler()
Destructor.
Definition: MzXMLHandler.h:101
String precision_
Definition: MzXMLHandler.h:202
Int toInt() const
Conversion to int.
void init_()
initialize members (call from C&#39;tor)
Definition: MzXMLHandler.h:136
const std::vector< ScanWindow > & getScanWindows() const
returns a const reference to the m/z scan windows
static const XMLCh * s_byteorder_
Definition: MzXMLHandler.h:397
Interfaces::IMSDataConsumer< MapType > * consumer_
Consumer class to work on spectra.
Definition: MzXMLHandler.h:220
static const XMLCh * s_completiontime_
Definition: MzXMLHandler.h:395
UInt getScanCount()
Gets the scan count.
Definition: MzXMLHandler.h:123
String & toLower()
Converts the string to lowercase.
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
PeakFileOptions options_
Options for loading and storing.
Definition: MzXMLHandler.h:185
static const XMLCh * s_num_
Definition: MzXMLHandler.h:416
const String & getModel() const
returns the instrument model
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
Scan window description.
Definition: ScanWindow.h:47
double getRT() const
Definition: MSSpectrum.h:243
const String & getVendor() const
returns the instrument vendor
void setOptions(const PeakFileOptions &options)
Sets the options.
Definition: MzXMLHandler.h:117
static const XMLCh * s_value_
Definition: MzXMLHandler.h:387
full scan mass spectrum, is a &quot;mass spectrum&quot; Synonyms: &#39;full spectrum&#39;, &#39;Q1 spectrum&#39;, &#39;Q3 spectrum&#39;, &#39;Single-Stage Mass Spectrometry&#39;
Definition: InstrumentSettings.h:57
String file_
File name.
Definition: XMLHandler.h:187
void getKeys(std::vector< String > &keys) const
fills the given vector with a list of all keys for which a value is set
const Software & getSoftware() const
returns a const reference to the software used for processing
IonizationMethod
ionization method
Definition: IonSource.h:80
Int nesting_level_
Definition: MzXMLHandler.h:190
const String & getName() const
returns the name of the software
static const XMLCh * s_pairorder_
Definition: MzXMLHandler.h:398
Selected ion monitoring scan Synonyms: &#39;Multiple ion monitoring scan&#39;, &#39;SIM scan&#39;, &#39;MIM scan&#39;.
Definition: InstrumentSettings.h:59
void populateSpectraWithData_()
Populate all spectra on the stack with data from input.
Definition: MzXMLHandler.h:326
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:56
std::vector< DataProcessing > data_processing_
data processing auxiliary variable
Definition: MzXMLHandler.h:244
Invalid conversion exception.
Definition: Exception.h:363
void setNameOfFile(const String &name_of_file)
sets the file name
double end
End of the window.
Definition: ScanWindow.h:64
PeakType PeakType
Peak type.
Definition: MSSpectrum.h:105
String compressionType_
Definition: MzXMLHandler.h:203
static const XMLCh * s_version_
Definition: MzXMLHandler.h:391
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzXMLHandler.h:210
const String & getFileType() const
returns the file type
bool getFillData() const
returns whether to fill the actual data into the container (spectrum/chromatogram) ...
MapType::PeakType PeakType
Peak type.
Definition: MzXMLHandler.h:175
Secure Hash Algorithm-1.
Definition: SourceFile.h:55
const String & getURL() const
returns the email address
static const XMLCh * s_last_
Definition: MzXMLHandler.h:412
Consecutive reaction monitoring scan Synonyms: &#39;CRM scan&#39;.
Definition: InstrumentSettings.h:61
SpectrumType spectrum
Definition: MzXMLHandler.h:205
const String & getFirstName() const
returns the first name of the person
UInt scan_count_
Consumer class to work on spectra.
Definition: MzXMLHandler.h:223
const String & getVersion() const
returns the software version
const DateTime & getCompletionTime() const
returns the time of completion of the processing
IonSource::Polarity getPolarity() const
returns the polarity
MzXMLHandler(const MapType &exp, const String &filename, const String &version, const ProgressLogger &logger)
Constructor for a write-only handler.
Definition: MzXMLHandler.h:85
String & removeWhitespaces()
removes whitespaces (space, tab, line feed, carriage return)
MSSpectrum< PeakType > SpectrumType
Spectrum type.
Definition: MzXMLHandler.h:177
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:68
Unknown scan method.
Definition: InstrumentSettings.h:55
String prefix(SizeType length) const
returns the prefix of length length
const String & getChecksum() const
returns the file&#39;s checksum
DateTime Class.
Definition: DateTime.h:55
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
String char_rest_
Definition: MzXMLHandler.h:204
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool skip_spectrum_
Flag that indicates whether this spectrum should be skipped (due to options)
Definition: MzXMLHandler.h:214
Data necessary to generate a single spectrum.
Definition: MzXMLHandler.h:199
const InstrumentSettings & getInstrumentSettings() const
returns a const reference to the instrument settings of the current spectrum
void doPopulateSpectraWithData_(SpectrumData &spectrum_data)
Fill a single spectrum with data from input.
Definition: MzXMLHandler.h:253
Peak picking (conversion from raw to peak data)
Definition: DataProcessing.h:67
static const XMLCh * s_startmz_
Definition: MzXMLHandler.h:409
Base64 decoder_
Definition: MzXMLHandler.h:189
static const XMLCh * s_type_
Definition: MzXMLHandler.h:389
general spectrum type
Definition: InstrumentSettings.h:56
const DRange< 1 > & getMZRange() const
returns the MZ range
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Options for loading files containing peak data.
Definition: PeakFileOptions.h:48
std::vector< std::vector< String > > cv_terms_
Array of CV term lists (one sublist denotes one term and it&#39;s children)
Definition: XMLHandler.h:220
AnalyzerType
analyzer type
Definition: MassAnalyzer.h:53
virtual void endElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname)
Parsing method for closing tags.
Definition: MzXMLHandler.h:929
const ProgressLogger & logger_
Progress logging class.
Definition: MzXMLHandler.h:226
static const std::string NamesOfScanMode[SIZE_OF_SCANMODE]
Names of scan modes.
Definition: InstrumentSettings.h:74
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software. ...
bool metaValueExists(const String &name) const
returns if this MetaInfo is set
static const XMLCh * s_filename_
Definition: MzXMLHandler.h:392
virtual void characters(const XMLCh *const chars, const XMLSize_t length)
Parsing method for character data.
Definition: MzXMLHandler.h:965
String suffix(SizeType length) const
returns the suffix of length length
bool skip_data
Definition: MzXMLHandler.h:206
void initStaticMembers_()
Definition: MzXMLHandler.h:423
const String & getLastName() const
returns the last name of the person
int Int
Signed integer type.
Definition: Types.h:96
MapType * exp_
map pointer for reading
Definition: MzXMLHandler.h:180
static const XMLCh * s_endmz_
Definition: MzXMLHandler.h:410
static const XMLCh * s_deisotoped_
Definition: MzXMLHandler.h:419
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
const String & getEmail() const
returns the email address
static const XMLCh * s_uri_
Definition: MzXMLHandler.h:415
Charge deconvolution.
Definition: DataProcessing.h:61
MzXMLHandler()
Not implemented.
void writeTo(std::ostream &os)
Write the contents to a stream.
Definition: MzXMLHandler.h:1032
static const XMLCh * s_filetype_
Definition: MzXMLHandler.h:393
static const XMLCh * s_intensitycutoff_
Definition: MzXMLHandler.h:417
void decode(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of floating point numbers.
Definition: Base64.h:334
static const XMLCh * s_mslevel_
Definition: MzXMLHandler.h:403
static const XMLCh * s_email_
Definition: MzXMLHandler.h:414
Polarity
Polarity of the ion source.
Definition: IonSource.h:140
static const XMLCh * s_precursorintensity_
Definition: MzXMLHandler.h:400
static const XMLCh * s_compressionType_
Definition: MzXMLHandler.h:399
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
Parse Error exception.
Definition: Exception.h:608
static const XMLCh * s_windowwideness_
Definition: MzXMLHandler.h:402
static const XMLCh * s_chargedeconvoluted_
Definition: MzXMLHandler.h:420
static const XMLCh * s_precision_
Definition: MzXMLHandler.h:396

OpenMS / TOPP release 2.0.0 Documentation generated on Wed Mar 30 2016 16:18:40 using doxygen 1.8.5