Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
37 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
38 
39 
44 #include <vector>
45 
46 namespace OpenMS
47 {
71  template <typename Container = MSSpectrum<> >
73  public SignalToNoiseEstimator<Container>
74  {
75 
76 public:
77 
80 
87 
90 
92 
95  {
96  //set the name for DefaultParamHandler error messages
97  this->setName("SignalToNoiseEstimatorMedian");
98 
99  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
100  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
101  " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
102  " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
103  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
104  " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", ListUtils::create<String>("advanced"));
105  defaults_.setMinInt("max_intensity", -1);
106 
107  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
108  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
109  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
110 
111  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
112  defaults_.setMinInt("auto_max_percentile", 0);
113  defaults_.setMaxInt("auto_max_percentile", 100);
114 
115  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
116  defaults_.setMinInt("auto_mode", -1);
117  defaults_.setMaxInt("auto_mode", 1);
118 
119  defaults_.setValue("win_len", 200.0, "window length in Thomson");
120  defaults_.setMinFloat("win_len", 1.0);
121 
122  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
123  defaults_.setMinInt("bin_count", 3);
124 
125  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
126  defaults_.setMinInt("min_required_elements", 1);
127 
128  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
129 
130 
132  }
133 
136  SignalToNoiseEstimator<Container>(source)
137  {
138  updateMembers_();
139  }
140 
146  {
147  if (&source == this) return *this;
148 
150  updateMembers_();
151  return *this;
152  }
153 
155 
156 
159  {}
160 
161 
162 protected:
163 
164 
170  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_)
171  {
172  // reset counter for sparse windows
173  double sparse_window_percent = 0;
174  // reset counter for histogram overflow
175  double histogram_oob_percent = 0;
176 
177  // reset the results
178  stn_estimates_.clear();
179 
180  // maximal range of histogram needs to be calculated first
181  if (auto_mode_ == AUTOMAXBYSTDEV)
182  {
183  // use MEAN+auto_max_intensity_*STDEV as threshold
184  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
185  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
186  }
187  else if (auto_mode_ == AUTOMAXBYPERCENT)
188  {
189  // get value at "auto_max_percentile_"th percentile
190  // we use a histogram approach here as well.
191  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
192  {
194  throw Exception::InvalidValue(__FILE__,
195  __LINE__,
196  __PRETTY_FUNCTION__,
197  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
198  s);
199  }
200 
201  std::vector<int> histogram_auto(100, 0);
202 
203  // find maximum of current scan
204  int size = 0;
205  typename PeakType::IntensityType maxInt = 0;
206  PeakIterator run = scan_first_;
207  while (run != scan_last_)
208  {
209  maxInt = std::max(maxInt, (*run).getIntensity());
210  ++size;
211  ++run;
212  }
213 
214  double bin_size = maxInt / 100;
215 
216  // fill histogram
217  run = scan_first_;
218  while (run != scan_last_)
219  {
220  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
221  ++run;
222  }
223 
224  // add up element counts in histogram until ?th percentile is reached
225  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
226  int elements_seen = 0;
227  int i = -1;
228  run = scan_first_;
229 
230  while (run != scan_last_ && elements_seen < elements_below_percentile)
231  {
232  ++i;
233  elements_seen += histogram_auto[i];
234  ++run;
235  }
236 
237  max_intensity_ = (((double)i) + 0.5) * bin_size;
238  }
239  else //if (auto_mode_ == MANUAL)
240  {
241  if (max_intensity_ <= 0)
242  {
244  throw Exception::InvalidValue(__FILE__,
245  __LINE__,
246  __PRETTY_FUNCTION__,
247  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
248  s);
249  }
250  }
251 
252  if (max_intensity_ < 0)
253  {
254  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
255  return;
256  }
257 
258  PeakIterator window_pos_center = scan_first_;
259  PeakIterator window_pos_borderleft = scan_first_;
260  PeakIterator window_pos_borderright = scan_first_;
261 
262  double window_half_size = win_len_ / 2;
263  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
264  int bin_count_minus_1 = bin_count_ - 1;
265 
266  std::vector<int> histogram(bin_count_, 0);
267  std::vector<double> bin_value(bin_count_, 0);
268  // calculate average intensity that is represented by a bin
269  for (int bin = 0; bin < bin_count_; bin++)
270  {
271  histogram[bin] = 0;
272  bin_value[bin] = (bin + 0.5) * bin_size;
273  }
274  // bin in which a datapoint would fall
275  int to_bin = 0;
276 
277  // index of bin where the median is located
278  int median_bin = 0;
279  // additive number of elements from left to x in histogram
280  int element_inc_count = 0;
281 
282  // tracks elements in current window, which may vary because of unevenly spaced data
283  int elements_in_window = 0;
284  // number of windows
285  int window_count = 0;
286 
287  // number of elements where we find the median
288  int element_in_window_half = 0;
289 
290  double noise; // noise value of a datapoint
291 
292  // determine how many elements we need to estimate (for progress estimation)
293  int windows_overall = 0;
294  PeakIterator run = scan_first_;
295  while (run != scan_last_)
296  {
297  ++windows_overall;
298  ++run;
299  }
300  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
301 
302  // MAIN LOOP
303  while (window_pos_center != scan_last_)
304  {
305 
306  // erase all elements from histogram that will leave the window on the LEFT side
307  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
308  {
309  to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
310  --histogram[to_bin];
311  --elements_in_window;
312  ++window_pos_borderleft;
313  }
314 
315  // add all elements to histogram that will enter the window on the RIGHT side
316  while ((window_pos_borderright != scan_last_)
317  && ((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size))
318  {
319  //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
320  to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
321  ++histogram[to_bin];
322  ++elements_in_window;
323  ++window_pos_borderright;
324  }
325 
326  if (elements_in_window < min_required_elements_)
327  {
328  noise = noise_for_empty_window_;
329  ++sparse_window_percent;
330  }
331  else
332  {
333  // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
334  median_bin = -1;
335  element_inc_count = 0;
336  element_in_window_half = (elements_in_window + 1) / 2;
337  while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
338  {
339  ++median_bin;
340  element_inc_count += histogram[median_bin];
341  }
342 
343  // increase the error count
344  if (median_bin == bin_count_minus_1) {++histogram_oob_percent; }
345 
346  // just avoid division by 0
347  noise = std::max(1.0, bin_value[median_bin]);
348  }
349 
350  // store result
351  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
352 
353 
354  // advance the window center by one datapoint
355  ++window_pos_center;
356  ++window_count;
357  // update progress
359 
360  } // end while
361 
363 
364  sparse_window_percent = sparse_window_percent * 100 / window_count;
365  histogram_oob_percent = histogram_oob_percent * 100 / window_count;
366 
367  // warn if percentage of sparse windows is above 20%
368  if (sparse_window_percent > 20)
369  {
370  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
371  << sparse_window_percent
372  << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
373  << std::endl;
374  }
375 
376  // warn if percentage of possibly wrong median estimates is above 1%
377  if (histogram_oob_percent > 1)
378  {
379  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
380  << histogram_oob_percent
381  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
382  << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
383  << std::endl;
384  }
385 
386  } // end of shiftWindow_
387 
390  {
391  max_intensity_ = (double)param_.getValue("max_intensity");
392  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
393  auto_max_percentile_ = param_.getValue("auto_max_percentile");
394  auto_mode_ = param_.getValue("auto_mode");
395  win_len_ = (double)param_.getValue("win_len");
396  bin_count_ = param_.getValue("bin_count");
397  min_required_elements_ = param_.getValue("min_required_elements");
398  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
399  is_result_valid_ = false;
400  }
401 
411  double win_len_;
419 
420 
421 
422  };
423 
424 } // namespace OpenMS
425 
426 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:157
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMedian.h:89
A more convenient string class.
Definition: String.h:57
double auto_max_percentile_
parameter for initial automatic estimation of &quot;max_intensity_&quot; percentile or a stdev ...
Definition: SignalToNoiseEstimatorMedian.h:407
Definition: SignalToNoiseEstimatorMedian.h:79
int auto_mode_
determines which method shall be used for estimating &quot;max_intensity_&quot;. valid are MANUAL=-1, AUTOMAXBYSTDEV=0 or AUTOMAXBYPERCENT=1
Definition: SignalToNoiseEstimatorMedian.h:409
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:150
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_)
Definition: SignalToNoiseEstimatorMedian.h:170
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMedian.h:413
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:92
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMedian.h:411
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:65
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMedian.h:403
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMedian.h:91
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean &amp; stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:175
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMedian.h:135
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed...
Definition: SignalToNoiseEstimator.h:215
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
void updateMembers_()
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed ...
Definition: SignalToNoiseEstimatorMedian.h:389
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:167
void endProgress() const
Ends the progress display.
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
double auto_max_stdev_Factor_
parameter for initial automatic estimation of &quot;max_intensity_&quot;: a stdev multiplier ...
Definition: SignalToNoiseEstimatorMedian.h:405
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation ...
Definition: SignalToNoiseEstimatorMedian.h:79
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:66
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMedian.h:418
Definition: SignalToNoiseEstimatorMedian.h:79
Invalid value exception.
Definition: Exception.h:336
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition: SignalToNoiseEstimatorMedian.h:145
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:72
SignalToNoiseEstimatorMedian()
default constructor
Definition: SignalToNoiseEstimatorMedian.h:94
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMedian.h:415
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:57
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:208
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void setProgress(SignedSize value) const
Sets the current progress.
void setName(const String &name)
Mutable access to the name.
virtual ~SignalToNoiseEstimatorMedian()
Destructor.
Definition: SignalToNoiseEstimatorMedian.h:158
Definition: SignalToNoiseEstimatorMedian.h:79
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMedian.h:88
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.

OpenMS / TOPP release 2.0.0 Documentation generated on Wed Mar 30 2016 16:18:41 using doxygen 1.8.5