class Mspire::Lipid::Search

Constants

STANDARD_MODIFICATIONS

Attributes

options[RW]
search_function[RW]

Public Class Methods

generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false) click to toggle source

will generate PossibleLipid objects and return a new search object uses only one kind of loss at a time and one type of gain at a time will also do the combination of a gain and a loss if gain_and_loss is true

# File lib/mspire/lipid/search.rb, line 33
def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
  possible_lipids = []
  real_mods_and_cnts = mods.map {|name, cnts| [Mspire::Lipid::Modification.new(name), cnts] }
  # one of each
  real_mods_and_cnts.each do |mod, counts|
    counts.each do |cnt|
      possible_lipids << Mspire::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
    end
  end
  if gain_and_loss
    # one of each gain + one of each loss
    (gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
    gain_mod_cnt_pairs.each do |mod, cnt|
      lipids.each do |lipid|
        #### need to implement still (use combinations or something...)
        get_this_working!
      end
    end
  end
  self.new(possible_lipids)
end
new(ions=[], opts={}) click to toggle source

ions are Mspire::Lipid::Ion objects each one should give a non-nil m/z value

# File lib/mspire/lipid/search.rb, line 57
def initialize(ions=[], opts={})
  @options = STANDARD_SEARCH.merge(opts)
  @db_isobar_spectrum = create_db_isobar_spectrum(ions)
  @search_function = create_search_function(ions, @options)
end

Public Instance Methods

create_db_isobar_spectrum(ions) click to toggle source

returns a DB isobar spectrum where the m/z values are all the m/z values to search for and the intensities each an array corresponding to all the lipid ions matching that m/z value

# File lib/mspire/lipid/search.rb, line 144
def create_db_isobar_spectrum(ions)
  mzs = [] ; query_groups = []
  pairs = ions.group_by(&:mz).sort_by(&:first)
  pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
  Mspire::Spectrum.new([mzs, query_groups])
end
create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true) click to toggle source

use_ppm uses ppm or amu if false returns the search_bins

# File lib/mspire/lipid/search.rb, line 153
def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
  search_bins.each do |search_bin| 
    rng = Random.new
    random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range)  }
    # find the deltas
    diffs = random_mzs.map do |random_mz| 
      nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
      delta = (random_mz - nearest_random_mz).abs
      use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
    end
    search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
  end
  search_bins
end
create_search_bins(db_isobar_spectrum, min_n_per_bin) click to toggle source
# File lib/mspire/lipid/search.rb, line 168
def create_search_bins(db_isobar_spectrum, min_n_per_bin)
  # make sure we get the right bin size based on the input
  ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
  (1..ss).each do |divisions|
    if  (ss.to_f / divisions) >= min_n_per_bin
      optimal_num_groups = divisions
    else ; break
    end
  end

  mz_ranges = []
  prev = nil

  groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a

  case groups.size
  when 0
    raise 'I think you need some data in your query spectrum!'
  when 1
    group = groups.first
    [ Mspire::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
  else
    search_bins = groups.each_cons(2).map do |points1, points2|
      bin = Mspire::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
      prev = points2
      bin
    end
    _range = Range.new(prev.first.first, prev.last.first)
    search_bins << Mspire::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
  end
end
create_search_function(ions, opt) click to toggle source
# File lib/mspire/lipid/search.rb, line 120
def create_search_function(ions, opt)

  db_isobar_spectrum = create_db_isobar_spectrum(ions)

  search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])

  create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])

  # create the actual search function
  # returns an array of hit_groups
  lambda do |search_queries, num_nearest_hits|
    Bin.bin(search_bins, search_queries, &:mz)
    search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
    hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
  end
end
qvalues!(hit_groups, opts) click to toggle source
# File lib/mspire/lipid/search.rb, line 81
def qvalues!(hit_groups, opts)

  # from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
  # but I've already coded this up before, too, in multiple ways...
  prev_bh_value = 0
  num_total_tests = hit_groups.size

  #hit_groups.each {|hg| p [hg.first.pvalue, hg] }

  # calculate Q-values BH style for now:
  # first hit is the best hit in the group
  pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }

  if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
    $stderr.puts "pvalue of NaN!"
    $stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
    raise
  end

  sorted_pval_index_tuples = pval_hg_index_tuples.sort

  sorted_pval_index_tuples.each_with_index do |tuple,i|
    pval = tuple.first
    bh_value = pval * num_total_tests / (i + 1)
    # Sometimes this correction can give values greater than 1,
    # so we set those values at 1
    bh_value = [bh_value, 1].min

    # To preserve monotonicity in the values, we take the
    # maximum of the previous value or this one, so that we
    # don't yield a value less than the previous.
    bh_value = [bh_value, prev_bh_value].max
    prev_bh_value = bh_value
    tuple.last.first.qvalue = bh_value # give the top hit the q-value
  end

  sorted_pval_index_tuples.map(&:last)
end