Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
Public Member Functions | Static Public Member Functions | List of all members
IDFilter Class Reference

Used to filter identifications by different criteria. More...

#include <OpenMS/FILTERING/ID/IDFilter.h>

Public Member Functions

 IDFilter ()
 Constructor. More...
 
virtual ~IDFilter ()
 Destructor. More...
 

Static Public Member Functions

template<class IdentificationType >
static bool getBestHit (const std::vector< IdentificationType > identifications, bool assume_sorted, PeptideHit &best_hit)
 
template<class IdentificationType >
static void filterIdentificationsByThreshold (const IdentificationType &identification, double threshold_fraction, IdentificationType &filtered_identification)
 filters a ProteinIdentification or PeptideIdentification by only allowing peptides/proteins which reach a score above threshold_fraction * SignificanceThreshold More...
 
template<class IdentificationType >
static void filterIdentificationsByScore (const IdentificationType &identification, double threshold_score, IdentificationType &filtered_identification)
 filters a ProteinIdentification or PeptideIdentification corresponding to the threshold_score More...
 
template<class IdentificationType >
static void filterIdentificationsByBestNHits (const IdentificationType &identification, Size n, IdentificationType &filtered_identification)
 filters a ProteinIdentification or PeptideIdentification corresponding to the score. More...
 
template<class IdentificationType >
static void filterIdentificationsByBestNToMHits (const IdentificationType &identification, Size n, Size m, IdentificationType &filtered_identification)
 filters a ProteinIdentification or PeptideIdentification corresponding to the score. More...
 
template<class IdentificationType >
static void filterIdentificationsByDecoy (const IdentificationType &identification, IdentificationType &filtered_identification)
 filters a ProteinIdentification or PeptideIdentification corresponding to their decoy information. More...
 
static void filterIdentificationsByBestHits (const PeptideIdentification &identification, PeptideIdentification &filtered_identification, bool strict=false)
 filters a PeptideIdentification keeping only the best scoring hits (if strict is set, keeping only the best hit only if it is the only hit with that score) More...
 
static bool filterIdentificationsByMetaValueRange (const PeptideIdentification &identification, const String &key, double low, double high, bool missing=false)
 Checks whether a meta value of the peptide identification is within a given range. More...
 
static void filterIdentificationsByProteins (const PeptideIdentification &identification, const std::vector< FASTAFile::FASTAEntry > &proteins, PeptideIdentification &filtered_identification, bool no_protein_identifiers=false)
 
static void filterIdentificationsByProteins (const ProteinIdentification &identification, const std::vector< FASTAFile::FASTAEntry > &proteins, ProteinIdentification &filtered_identification)
 
static void filterIdentificationsByExclusionPeptides (const PeptideIdentification &identification, const std::set< String > &peptides, bool ignore_modifications, PeptideIdentification &filtered_identification)
 removes all peptide hits having a sequence equal to a String in peptides. If ignore_modifications is set, the unmodified versions are generated and compared to the set of Strings. More...
 
static void filterIdentificationsByLength (const PeptideIdentification &identification, PeptideIdentification &filtered_identification, Size min_length, Size max_length=UINT_MAX)
 
static void filterIdentificationsByCharge (const PeptideIdentification &identification, Int charge, PeptideIdentification &filtered_identification)
 only peptides that have a charge equal to or greater than charge will be kept More...
 
static void filterIdentificationsByVariableModifications (const PeptideIdentification &identification, const std::vector< String > &fixed_modifications, PeptideIdentification &filtered_identification)
 only peptides having a variable modification will be kept More...
 
static void removeUnreferencedProteinHits (const ProteinIdentification &identification, const std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &filtered_identification)
 only protein hits in identification which are referenced by a peptide in peptide_identifications are kept More...
 
static void removeUnreferencedPeptideHits (const ProteinIdentification &identification, std::vector< PeptideIdentification > &peptide_identifications, bool delete_unreferenced_peptide_hits=false)
 only peptide hits in peptide_identifications which are referenced by a protein in identification are kept More...
 
static void filterIdentificationsUnique (const PeptideIdentification &identification, PeptideIdentification &filtered_identification)
 if a peptide hit occurs more than once per PSM, only one instance is kept More...
 
static void filterIdentificationsByMzError (const PeptideIdentification &identification, double mass_error, bool unit_ppm, PeptideIdentification &filtered_identification)
 filter identifications by deviation to the theoretical mass More...
 
static void filterIdentificationsByRT (const std::vector< PeptideIdentification > &identifications, double min_rt, double max_rt, std::vector< PeptideIdentification > &filtered_identifications)
 
static void filterIdentificationsByMZ (const std::vector< PeptideIdentification > &identifications, double min_mz, double max_mz, std::vector< PeptideIdentification > &filtered_identifications)
 
static void filterIdentificationsByRTPValues (const PeptideIdentification &identification, PeptideIdentification &filtered_identification, double p_value=0.05)
 Filters the peptide hits according to their predicted RT p-values. More...
 
static void filterIdentificationsByRTFirstDimPValues (const PeptideIdentification &identification, PeptideIdentification &filtered_identification, double p_value=0.05)
 Filters the peptide hits according to their predicted RT p-values of the first dimension. More...
 
template<class PeakT >
static void filterIdentificationsByThresholds (MSExperiment< PeakT > &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
 filters an MS/MS experiment corresponding to the threshold fractions More...
 
template<class PeakT >
static void filterIdentificationsByScores (MSExperiment< PeakT > &experiment, double peptide_threshold_score, double protein_threshold_score)
 filters an MS/MS experiment corresponding to the threshold scores More...
 
template<class PeakT >
static void filterIdentificationsByBestNHits (MSExperiment< PeakT > &experiment, Size n)
 filters an MS/MS experiment corresponding to the best n hits for every spectrum More...
 
template<class PeakT >
static void filterIdentificationsByProteins (MSExperiment< PeakT > &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
 filters an MS/MS experiment corresponding to the given proteins More...
 
static bool updateProteinGroups (const std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits, std::vector< ProteinIdentification::ProteinGroup > &filtered_groups)
 Update protein groups after protein hits were filtered. More...
 

Detailed Description

Used to filter identifications by different criteria.

The identifications are filtered by significance thresholds and by sequences. The filtering by significance thresholds looks for the best ProteinIdentification that fulfills the significance threshold criterion. score > significance-threshold * significance_fraction. The filtering by sequences looks for the best ProteinIdentification that is contained in one of the protein sequences.

TODO: fix design of filter functions. There will be an error e.g. if input and output points to the same PeptideIdentification.

Constructor & Destructor Documentation

IDFilter ( )

Constructor.

virtual ~IDFilter ( )
virtual

Destructor.

Member Function Documentation

static void filterIdentificationsByBestHits ( const PeptideIdentification identification,
PeptideIdentification filtered_identification,
bool  strict = false 
)
static

filters a PeptideIdentification keeping only the best scoring hits (if strict is set, keeping only the best hit only if it is the only hit with that score)

static void filterIdentificationsByBestNHits ( const IdentificationType &  identification,
Size  n,
IdentificationType &  filtered_identification 
)
inlinestatic

filters a ProteinIdentification or PeptideIdentification corresponding to the score.

If the method higherScoreBetter() returns true for the IdentificationType the n highest scoring hits are kept. Otherwise the n lowest scoring hits are kept.

Referenced by SimpleSearchEngine::postProcessHits_().

static void filterIdentificationsByBestNHits ( MSExperiment< PeakT > &  experiment,
Size  n 
)
inlinestatic
static void filterIdentificationsByBestNToMHits ( const IdentificationType &  identification,
Size  n,
Size  m,
IdentificationType &  filtered_identification 
)
inlinestatic

filters a ProteinIdentification or PeptideIdentification corresponding to the score.

If the method higherScoreBetter() returns true for the IdentificationType the n to m highest scoring hits are kept. Otherwise the n to m lowest scoring hits are kept. This method is useful if a range of higher hits are used for decoy fairness analysis.

static void filterIdentificationsByCharge ( const PeptideIdentification identification,
Int  charge,
PeptideIdentification filtered_identification 
)
static

only peptides that have a charge equal to or greater than charge will be kept

static void filterIdentificationsByDecoy ( const IdentificationType &  identification,
IdentificationType &  filtered_identification 
)
inlinestatic

filters a ProteinIdentification or PeptideIdentification corresponding to their decoy information.

Checks for "target_decoy" or "isDecoy" metadata and removes a Protein/Peptide if the values are "decoy" or "true" respectively.

static void filterIdentificationsByExclusionPeptides ( const PeptideIdentification identification,
const std::set< String > &  peptides,
bool  ignore_modifications,
PeptideIdentification filtered_identification 
)
static

removes all peptide hits having a sequence equal to a String in peptides. If ignore_modifications is set, the unmodified versions are generated and compared to the set of Strings.

static void filterIdentificationsByLength ( const PeptideIdentification identification,
PeptideIdentification filtered_identification,
Size  min_length,
Size  max_length = UINT_MAX 
)
static

Only peptides having a length l with min_length <= l <= max_length will be kept. max_length will be ignored if it is smaller than min_length.

static bool filterIdentificationsByMetaValueRange ( const PeptideIdentification identification,
const String key,
double  low,
double  high,
bool  missing = false 
)
static

Checks whether a meta value of the peptide identification is within a given range.

Useful for filtering by precursor RT or m/z.

Parameters
identificationThe peptide ID to check
keyKey (name) for the meta value
lowLower boundary (inclusive)
highUpper boundary (inclusive)
missingWhat to return when the meta value is missing
Returns
Whether the peptide ID passes the check
static void filterIdentificationsByMZ ( const std::vector< PeptideIdentification > &  identifications,
double  min_mz,
double  max_mz,
std::vector< PeptideIdentification > &  filtered_identifications 
)
static

only peptides that are in a certain precursor MZ range will be kept Peptides with no MZ value will be removed in any case

static void filterIdentificationsByMzError ( const PeptideIdentification identification,
double  mass_error,
bool  unit_ppm,
PeptideIdentification filtered_identification 
)
static

filter identifications by deviation to the theoretical mass

static void filterIdentificationsByProteins ( const PeptideIdentification identification,
const std::vector< FASTAFile::FASTAEntry > &  proteins,
PeptideIdentification filtered_identification,
bool  no_protein_identifiers = false 
)
static

filters a PeptideIdentification corresponding to the given proteins PeptideHits with no matching proteins are removed. Matching is done either based on accessions or on sequence (if no accessions are given, or no_protein_identifiers is set)

static void filterIdentificationsByProteins ( const ProteinIdentification identification,
const std::vector< FASTAFile::FASTAEntry > &  proteins,
ProteinIdentification filtered_identification 
)
static

filters a ProteinIdentification corresponding to the given proteins ProteinHits with no matching proteins are removed. Matching is done based on accessions only

static void filterIdentificationsByProteins ( MSExperiment< PeakT > &  experiment,
const std::vector< FASTAFile::FASTAEntry > &  proteins 
)
inlinestatic

filters an MS/MS experiment corresponding to the given proteins

References PeptideIdentification::getHits(), and MSExperiment< PeakT, ChromatogramPeakT >::size().

static void filterIdentificationsByRT ( const std::vector< PeptideIdentification > &  identifications,
double  min_rt,
double  max_rt,
std::vector< PeptideIdentification > &  filtered_identifications 
)
static

only peptides that are in a certain precursor RT range will be kept Peptides with no RT value will be removed in any case

static void filterIdentificationsByRTFirstDimPValues ( const PeptideIdentification identification,
PeptideIdentification filtered_identification,
double  p_value = 0.05 
)
static

Filters the peptide hits according to their predicted RT p-values of the first dimension.

Filters the peptide hits of this ProteinIdentification by the probability (p-value) of a correct ProteinIdentification having a deviation between observed and predicted RT equal or bigger than allowed.

static void filterIdentificationsByRTPValues ( const PeptideIdentification identification,
PeptideIdentification filtered_identification,
double  p_value = 0.05 
)
static

Filters the peptide hits according to their predicted RT p-values.

Filters the peptide hits of this ProteinIdentification by the probability (p-value) of a correct ProteinIdentification having a deviation between observed and predicted RT equal or bigger than allowed.

static void filterIdentificationsByScore ( const IdentificationType &  identification,
double  threshold_score,
IdentificationType &  filtered_identification 
)
inlinestatic

filters a ProteinIdentification or PeptideIdentification corresponding to the threshold_score

If the method higherScoreBetter() returns true for the IdentificationType all hits with a score smaller than threshold_score are removed. Otherwise all hits with a score bigger than threshold_score are removed.

static void filterIdentificationsByScores ( MSExperiment< PeakT > &  experiment,
double  peptide_threshold_score,
double  protein_threshold_score 
)
inlinestatic
static void filterIdentificationsByThreshold ( const IdentificationType &  identification,
double  threshold_fraction,
IdentificationType &  filtered_identification 
)
inlinestatic

filters a ProteinIdentification or PeptideIdentification by only allowing peptides/proteins which reach a score above threshold_fraction * SignificanceThreshold

static void filterIdentificationsByThresholds ( MSExperiment< PeakT > &  experiment,
double  peptide_threshold_fraction,
double  protein_threshold_fraction 
)
inlinestatic
static void filterIdentificationsByVariableModifications ( const PeptideIdentification identification,
const std::vector< String > &  fixed_modifications,
PeptideIdentification filtered_identification 
)
static

only peptides having a variable modification will be kept

static void filterIdentificationsUnique ( const PeptideIdentification identification,
PeptideIdentification filtered_identification 
)
static

if a peptide hit occurs more than once per PSM, only one instance is kept

static bool getBestHit ( const std::vector< IdentificationType >  identifications,
bool  assume_sorted,
PeptideHit best_hit 
)
inlinestatic

gets the best scoring peptide hit from a vector of peptide identifications

Parameters
identificationsVector of peptide ids, each containing one or more peptide hits
assume_sortedare hits sorted by score (best score first) already? This allows for faster query, since only the first hit needs to be looked at
Returns
true if a hit was present, false otherwise

References OpenMS::Constants::h.

static void removeUnreferencedPeptideHits ( const ProteinIdentification identification,
std::vector< PeptideIdentification > &  peptide_identifications,
bool  delete_unreferenced_peptide_hits = false 
)
static

only peptide hits in peptide_identifications which are referenced by a protein in identification are kept

static void removeUnreferencedProteinHits ( const ProteinIdentification identification,
const std::vector< PeptideIdentification > &  peptide_identifications,
ProteinIdentification filtered_identification 
)
static

only protein hits in identification which are referenced by a peptide in peptide_identifications are kept

static bool updateProteinGroups ( const std::vector< ProteinIdentification::ProteinGroup > &  groups,
const std::vector< ProteinHit > &  hits,
std::vector< ProteinIdentification::ProteinGroup > &  filtered_groups 
)
static

Update protein groups after protein hits were filtered.

Parameters
groupsInput protein groups
hitsAvailable protein hits (all others are removed from the groups)
filtered_groupsOutput protein groups
Returns
Returns whether the groups are still valid (which is the case if only whole groups, if any, were removed).

OpenMS / TOPP release 2.0.0 Documentation generated on Wed Mar 30 2016 16:18:46 using doxygen 1.8.5