35 #ifndef OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
36 #define OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
81 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
82 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
88 rt_max_ = (
double) param_.getValue(
"rt_tolerance");
89 mz_max_ = (
double) param_.getValue(
"mz_tolerance");
97 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
104 double d_rt = fabs(first.
getRT() - second.
getRT());
105 double d_mz = fabs(first.
getMZ() - second.
getMZ());
107 if (d_rt > rt_max_ || d_mz > mz_max_) {
return 0; }
110 double sim = getSimilarity(d_rt, d_mz);
147 template <
typename MapType>
150 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
151 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
152 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
154 if (rt_max_length == 0)
156 rt_max_length = (std::numeric_limits<double>::max)();
159 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
163 SignedSize block_size_count(rt_block_size + 1);
164 Size idx_spectrum(0);
167 if (
Int(it1->getMSLevel()) == *it_mslevel)
170 if (++block_size_count >= rt_block_size ||
171 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
173 block_size_count = 0;
174 idx_block = idx_spectrum;
178 spectra_to_merge[idx_block].push_back(idx_spectrum);
185 if (block_size_count == 0)
187 spectra_to_merge[idx_block] = std::vector<Size>();
191 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
200 template <
typename MapType>
206 std::vector<BinaryTreeNode> tree;
210 std::vector<BaseFeature> data;
212 for (
Size i = 0; i < exp.
size(); ++i)
214 if (exp[i].getMSLevel() != 2)
continue;
217 index_mapping[data.size()] = i;
221 bf.
setRT(exp[i].getRT());
222 std::vector<Precursor> pcs = exp[i].getPrecursors();
224 if (pcs.size() > 1)
LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
225 bf.
setMZ(pcs[0].getMZ());
228 data_size = data.size();
243 std::vector<std::vector<Size> > clusters;
246 for (
Size ii = 0; ii < tree.size(); ++ii)
248 if (tree[ii].distance >= 1) tree[ii].distance = -1;
249 if (tree[ii].distance != -1) ++node_count;
251 ca.
cut(data_size - node_count, tree, clusters);
259 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
261 if (clusters[i_outer].size() <= 1)
continue;
263 Size cl_index0 = clusters[i_outer][0];
264 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
266 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
268 Size cl_index = clusters[i_outer][i_inner];
269 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
274 mergeSpectra_(exp, spectra_to_merge, 2);
295 template <
typename MapType>
298 double mz_binning_width(param_.getValue(
"mz_binning_width"));
299 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
305 std::set<Size> merged_indices;
310 p.
setValue(
"tolerance", mz_binning_width);
313 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
315 std::vector<std::pair<Size, Size> > alignment;
317 Size count_peaks_aligned(0);
318 Size count_peaks_overall(0);
321 for (
Map<
Size, std::vector<Size> >::ConstIterator it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
324 ++cluster_sizes[it->second.size() + 1];
330 merged_indices.insert(it->first);
333 double rt_average = consensus_spec.
getRT();
334 double precursor_mz_average = 0.0;
335 Size precursor_count(0);
338 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
342 count_peaks_overall += consensus_spec.size();
345 for (std::vector<Size>::const_iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
347 consensus_spec.
unify(exp[*sit]);
348 merged_indices.insert(*sit);
350 rt_average += exp[*sit].getRT();
351 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
353 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
360 count_peaks_aligned += alignment.size();
361 count_peaks_overall += exp[*sit].
size();
364 Size spec_b_index(0);
367 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
371 if (alignment.size() > 0 && alignment[align_index].second == spec_b_index)
373 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
374 pit->getIntensity());
376 if (align_index == alignment.size()) alignment.clear();
380 consensus_spec.push_back(*pit);
385 if (spec_a + spec_b - align_size != consensus_spec.size()) std::cerr <<
"\n\n ERRROR \n\n";
387 rt_average /= it->second.size() + 1;
388 consensus_spec.
setRT(rt_average);
392 if (precursor_count) precursor_mz_average /= precursor_count;
396 pcs[0].setMZ(precursor_mz_average);
400 if (consensus_spec.empty())
continue;
401 else merged_spectra.addSpectrum(consensus_spec);
407 LOG_INFO <<
" size " << it->first <<
": " << it->second <<
"x\n";
411 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
412 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
418 for (
Size i = 0; i < exp.
size(); ++i)
420 if (merged_indices.count(i) == 0)
435 exp.
getSpectra().insert(exp.
end(), merged_spectra.begin(), merged_spectra.end());
442 #endif //OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
Size size() const
Definition: MSExperiment.h:117
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:455
void sortByPosition()
Lexicographically sorts the peaks by their position.
Definition: MSSpectrum.h:419
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:296
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:148
Iterator begin()
Definition: MSExperiment.h:147
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:59
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:61
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:128
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Definition: MSExperiment.h:600
Base::const_iterator const_iterator
Definition: MSExperiment.h:115
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:203
void setParameters(const Param ¶m)
Sets the parameters.
SpectraDistance_()
Definition: SpectraMerger.h:78
A basic LC-MS feature.
Definition: BaseFeature.h:56
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
Iterator end()
Definition: MSExperiment.h:157
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:209
double rt_max_
Definition: SpectraMerger.h:116
double getRT() const
Definition: MSSpectrum.h:243
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: MSSpectrum.h:265
Definition: SpectraMerger.h:74
const std::vector< MSSpectrum< PeakT > > & getSpectra() const
returns the spectra list
Definition: MSExperiment.h:764
Aligns the peaks of two spectra.
Definition: SpectrumAlignment.h:62
void addSpectrum(const MSSpectrum< PeakT > &spectrum)
adds a spectra to the list
Definition: MSExperiment.h:758
void setRT(double rt)
Sets the absolute retention time (is seconds)
Definition: MSSpectrum.h:249
Management and storage of parameters / INI files.
Definition: Param.h:75
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:124
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:191
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:58
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
Illegal self operation exception.
Definition: Exception.h:379
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:103
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:101
double mz_max_
Definition: SpectraMerger.h:117
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:94
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:850
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:201
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType &s1, const SpectrumType &s2) const
Definition: SpectrumAlignment.h:83
int Int
Signed integer type.
Definition: Types.h:96
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:51
void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:86
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:108