35 #ifndef OPENMS_FILTERING_ID_IDFILTER_H
36 #define OPENMS_FILTERING_ID_IDFILTER_H
75 template <
class IdentificationType>
76 static bool getBestHit(
const std::vector<IdentificationType> identifications,
bool assume_sorted,
PeptideHit& best_hit)
78 if (identifications.size() == 0)
return false;
80 bool is_higher_score_better = identifications[0].isHigherScoreBetter();
81 double best_score = (is_higher_score_better ? -1 : 1) * std::numeric_limits<double>::max();
83 Size best_i_index(0), best_h_index(0);
86 for (
Size i = 0; i != identifications.size(); ++i)
88 if (identifications[i].getHits().size() == 0)
continue;
90 is_higher_score_better = identifications[i].isHigherScoreBetter();
91 max_h = (assume_sorted ? 1 : identifications[i].getHits().size());
92 for (
Size h = 0;
h < max_h; ++
h)
94 double score = identifications[i].getHits()[
h].getScore();
96 if (score > best_score * (is_higher_score_better ? 1 : -1))
105 if (max_h == -1)
return false;
107 best_hit = identifications[best_i_index].getHits()[best_h_index];
113 template <
class IdentificationType>
116 typedef typename IdentificationType::HitType HitType;
117 std::vector<HitType> temp_hits;
118 std::vector<HitType> filtered_hits;
120 filtered_identification = identification;
121 filtered_identification.setHits(std::vector<HitType>());
123 for (
typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
124 it != identification.getHits().end();
127 if (it->getScore() >= threshold_fraction * identification.getSignificanceThreshold())
129 filtered_hits.push_back(*it);
133 if (!filtered_hits.empty())
135 filtered_identification.setHits(filtered_hits);
136 filtered_identification.assignRanks();
147 template <
class IdentificationType>
148 static void filterIdentificationsByScore(
const IdentificationType& identification,
double threshold_score, IdentificationType& filtered_identification)
150 typedef typename IdentificationType::HitType HitType;
151 std::vector<HitType> temp_hits;
152 std::vector<HitType> filtered_hits;
154 filtered_identification = identification;
155 filtered_identification.setHits(std::vector<HitType>());
157 for (
typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
158 it != identification.getHits().end();
161 if (identification.isHigherScoreBetter())
163 if (it->getScore() >= threshold_score)
165 filtered_hits.push_back(*it);
170 if (it->getScore() <= threshold_score)
172 filtered_hits.push_back(*it);
177 if (!filtered_hits.empty())
179 filtered_identification.setHits(filtered_hits);
180 filtered_identification.assignRanks();
190 template <
class IdentificationType>
193 typedef typename IdentificationType::HitType HitType;
194 std::vector<HitType> temp_hits;
195 std::vector<HitType> filtered_hits;
198 IdentificationType temp_identification = identification;
199 temp_identification.sort();
201 filtered_identification = identification;
202 filtered_identification.setHits(std::vector<HitType>());
205 typename std::vector<HitType>::const_iterator it = temp_identification.getHits().begin();
206 while (it != temp_identification.getHits().end()
209 filtered_hits.push_back(*it);
214 if (!filtered_hits.empty())
216 filtered_identification.setHits(filtered_hits);
217 filtered_identification.assignRanks();
228 template <
class IdentificationType>
236 typedef typename IdentificationType::HitType HitType;
237 std::vector<HitType> filtered_hits;
239 IdentificationType temp_identification = identification;
240 temp_identification.sort();
242 filtered_identification = identification;
243 filtered_identification.setHits(std::vector<HitType>());
245 const std::vector<HitType>& hits = temp_identification.getHits();
246 for (
Size i = n - 1; n <= m - 1; ++i)
248 if (i >= hits.size())
252 filtered_hits.push_back(hits[i]);
255 if (!filtered_hits.empty())
257 filtered_identification.setHits(filtered_hits);
258 filtered_identification.assignRanks();
269 template <
class IdentificationType>
272 typedef typename IdentificationType::HitType HitType;
273 std::vector<HitType> temp_hits;
274 std::vector<HitType> filtered_hits;
276 filtered_identification = identification;
277 filtered_identification.setHits(std::vector<HitType>());
279 for (
typename std::vector<HitType>::const_iterator it = identification.getHits().begin();
280 it != identification.getHits().end();
283 bool isDecoy = ((it->metaValueExists(
"isDecoy") && (
String)it->getMetaValue(
"isDecoy") ==
"true") ||
284 (it->metaValueExists(
"target_decoy") && (
String)it->getMetaValue(
"target_decoy") ==
"decoy"));
287 filtered_hits.push_back(*it);
291 if (!filtered_hits.empty())
293 filtered_identification.setHits(filtered_hits);
294 filtered_identification.assignRanks();
314 static bool filterIdentificationsByMetaValueRange(
const PeptideIdentification& identification,
const String& key,
double low,
double high,
bool missing =
false);
319 static void filterIdentificationsByProteins(
const PeptideIdentification& identification,
const std::vector<FASTAFile::FASTAEntry>& proteins,
PeptideIdentification& filtered_identification,
bool no_protein_identifiers =
false);
327 static void filterIdentificationsByExclusionPeptides(
const PeptideIdentification& identification,
const std::set<String>& peptides,
bool ignore_modifications,
PeptideIdentification& filtered_identification);
343 static void removeUnreferencedPeptideHits(
const ProteinIdentification& identification, std::vector<PeptideIdentification>& peptide_identifications,
bool delete_unreferenced_peptide_hits =
false);
353 static void filterIdentificationsByRT(
const std::vector<PeptideIdentification>& identifications,
double min_rt,
double max_rt, std::vector<PeptideIdentification>& filtered_identifications);
357 static void filterIdentificationsByMZ(
const std::vector<PeptideIdentification>& identifications,
double min_mz,
double max_mz, std::vector<PeptideIdentification>& filtered_identifications);
378 template <
class PeakT>
383 std::vector<ProteinIdentification> filtered_protein_identifications;
387 filterIdentificationsByThreshold(experiment.
getProteinIdentifications()[j], protein_threshold_fraction, temp_protein_identification);
388 if (!temp_protein_identification.
getHits().empty())
390 filtered_protein_identifications.push_back(temp_protein_identification);
397 std::vector<PeptideIdentification> filtered_identifications;
399 for (
Size i = 0; i < experiment.
size(); i++)
401 for (
Size j = 0; j < experiment[i].getPeptideIdentifications().
size(); j++)
403 filterIdentificationsByThreshold(experiment[i].getPeptideIdentifications()[j], peptide_threshold_fraction, temp_identification);
404 if (!temp_identification.
getHits().empty())
406 filtered_identifications.push_back(temp_identification);
409 experiment[i].setPeptideIdentifications(filtered_identifications);
410 filtered_identifications.clear();
415 template <
class PeakT>
420 std::vector<ProteinIdentification> filtered_protein_identifications;
424 filterIdentificationsByScore(experiment.
getProteinIdentifications()[j], protein_threshold_score, temp_protein_identification);
425 if (!temp_protein_identification.
getHits().empty())
427 filtered_protein_identifications.push_back(temp_protein_identification);
434 std::vector<PeptideIdentification> filtered_identifications;
436 for (
Size i = 0; i < experiment.
size(); i++)
438 for (
Size j = 0; j < experiment[i].getPeptideIdentifications().
size(); j++)
440 filterIdentificationsByScore(experiment[i].getPeptideIdentifications()[j], peptide_threshold_score, temp_identification);
441 if (!temp_identification.
getHits().empty())
443 filtered_identifications.push_back(temp_identification);
446 experiment[i].setPeptideIdentifications(filtered_identifications);
447 filtered_identifications.clear();
452 template <
class PeakT>
457 std::vector<ProteinIdentification> filtered_protein_identifications;
462 if (!temp_protein_identification.
getHits().empty())
464 filtered_protein_identifications.push_back(temp_protein_identification);
471 std::vector<PeptideIdentification> filtered_identifications;
473 for (
Size i = 0; i < experiment.
size(); i++)
475 for (
Size j = 0; j < experiment[i].getPeptideIdentifications().
size(); j++)
477 filterIdentificationsByBestNHits(experiment[i].getPeptideIdentifications()[j], n, temp_identification);
478 if (!temp_identification.
getHits().empty())
480 filtered_identifications.push_back(temp_identification);
483 experiment[i].setPeptideIdentifications(filtered_identifications);
484 filtered_identifications.clear();
489 template <
class PeakT>
492 std::vector<PeptideIdentification> temp_identifications;
493 std::vector<PeptideIdentification> filtered_identifications;
496 for (
Size i = 0; i < experiment.
size(); i++)
498 if (experiment[i].getMSLevel() == 2)
500 temp_identifications = experiment[i].getPeptideIdentifications();
501 for (
Size j = 0; j < temp_identifications.size(); j++)
503 filterIdentificationsByProteins(temp_identifications[j], proteins, temp_identification);
504 if (!temp_identification.
getHits().empty())
506 filtered_identifications.push_back(temp_identification);
509 experiment[i].setPeptideIdentifications(filtered_identifications);
510 filtered_identifications.clear();
524 static bool updateProteinGroups(
525 const std::vector<ProteinIdentification::ProteinGroup>& groups,
526 const std::vector<ProteinHit>& hits,
527 std::vector<ProteinIdentification::ProteinGroup>& filtered_groups);
533 #endif // OPENMS_FILTERING_ID_IDFILTER_H
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein ProteinIdentification vector
A more convenient string class.
Definition: String.h:57
Size size() const
Definition: MSExperiment.h:117
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
static void filterIdentificationsByProteins(MSExperiment< PeakT > &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
filters an MS/MS experiment corresponding to the given proteins
Definition: IDFilter.h:490
static void filterIdentificationsByScore(const IdentificationType &identification, double threshold_score, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the threshold_score ...
Definition: IDFilter.h:148
static void filterIdentificationsByThreshold(const IdentificationType &identification, double threshold_fraction, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification by only allowing peptides/proteins which rea...
Definition: IDFilter.h:114
static void filterIdentificationsByScores(MSExperiment< PeakT > &experiment, double peptide_threshold_score, double protein_threshold_score)
filters an MS/MS experiment corresponding to the threshold scores
Definition: IDFilter.h:416
Representation of a peptide hit.
Definition: PeptideHit.h:55
static void filterIdentificationsByBestNHits(const IdentificationType &identification, Size n, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the score.
Definition: IDFilter.h:191
static void filterIdentificationsByThresholds(MSExperiment< PeakT > &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
filters an MS/MS experiment corresponding to the threshold fractions
Definition: IDFilter.h:379
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
static bool getBestHit(const std::vector< IdentificationType > identifications, bool assume_sorted, PeptideHit &best_hit)
Definition: IDFilter.h:76
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:69
const std::vector< ProteinHit > & getHits() const
Returns the protein hits.
static void filterIdentificationsByBestNToMHits(const IdentificationType &identification, Size n, Size m, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to the score.
Definition: IDFilter.h:229
static void filterIdentificationsByBestNHits(MSExperiment< PeakT > &experiment, Size n)
filters an MS/MS experiment corresponding to the best n hits for every spectrum
Definition: IDFilter.h:453
Used to filter identifications by different criteria.
Definition: IDFilter.h:61
int Int
Signed integer type.
Definition: Types.h:96
static void filterIdentificationsByDecoy(const IdentificationType &identification, IdentificationType &filtered_identification)
filters a ProteinIdentification or PeptideIdentification corresponding to their decoy information...
Definition: IDFilter.h:270
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63