class NpSearch::ScoreSequence

A class to score the Sequences

Constants

DI_CLV
MONO_NP_CLV_2
MONO_NP_CLV_4
MONO_NP_CLV_6
NP_CLV

Public Class Methods

run(sequence, opt) click to toggle source
# File lib/npsearch/scoresequence.rb, line 13
def run(sequence, opt)
  split_into_potential_neuropeptides(sequence)
  count_np_cleavage_sites(sequence)
  count_c_terminal_glycines(sequence)
  np_similarity(sequence, opt[:temp_dir])
  acidic_spacers(sequence)
end

Private Class Methods

acidic_spacers(sequence) click to toggle source

Adds 0.10 if the acidic spacer is detected. Acidic Spacer is defined as being less than 25% of the precursor length (not including the Signalp) && having more than 50% D and E amino acids.

# File lib/npsearch/scoresequence.rb, line 72
def acidic_spacers(sequence)
  sequence.potential_cleaved_nps.each do |e|
    next if e[:np].length / sequence.seq.length > 0.25
    sequence.score += 0.10 if e[:np].count('DE') / e[:np].length > 0.5
  end
end
count_c_terminal_glycines(sequence) click to toggle source

Counts the number of C-terminal glycines

# File lib/npsearch/scoresequence.rb, line 56
def count_c_terminal_glycines(sequence)
  return if sequence.potential_cleaved_nps.empty?
  sequence.potential_cleaved_nps.each do |e|
    if e[:np] =~ /FG$/ && e[:di_clv_end] == 'KR'
      sequence.score += 0.40
    elsif e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
      sequence.score += 0.25
    elsif e[:np] =~ /G$|GK$|GR$/
      sequence.score += 0.10
    end
  end
end
count_dibasic_np_clv(sequence, dibasic_clv) click to toggle source
# File lib/npsearch/scoresequence.rb, line 41
def count_dibasic_np_clv(sequence, dibasic_clv)
  case dibasic_clv
  when 'KR'
    sequence.score += 0.09
  when 'RR', 'KK'
    sequence.score += 0.05
  end
end
count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv) click to toggle source
# File lib/npsearch/scoresequence.rb, line 50
def count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv)
  return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
  sequence.score += 0.02
end
count_np_cleavage_sites(sequence) click to toggle source
# File lib/npsearch/scoresequence.rb, line 32
def count_np_cleavage_sites(sequence)
  return if sequence.potential_cleaved_nps.empty?
  sequence.potential_cleaved_nps.each do |e|
    count_dibasic_np_clv(sequence, e[:di_clv_end])
    count_mono_basic_np_clv(sequence, e[:mono_2_clv_end],
                            e[:mono_4_clv_end], e[:mono_6_clv_end])
  end
end
np_similarity(sequence, temp_dir, results = nil) click to toggle source
# File lib/npsearch/scoresequence.rb, line 79
def np_similarity(sequence, temp_dir, results = nil)
  results  = run_cdhit(sequence, temp_dir) if results.nil?
  clusters = results.split(/^>Cluster \d+\n/)
  clusters.each do |c|
    next if c.nil?
    no_of_seqs_in_cluster = c.split("\n").length
    if no_of_seqs_in_cluster > 1
      sequence.score += (0.15 * no_of_seqs_in_cluster)
    end
  end
end
run_cdhit(sequence, temp_dir) click to toggle source
# File lib/npsearch/scoresequence.rb, line 91
def run_cdhit(sequence, temp_dir)
  f = Tempfile.new('clust', temp_dir)
  fo = Tempfile.new('clust_out', temp_dir)
  return unless write_potential_peptides_to_tempfile(sequence, f)
  `cd-hit -c 0.5 -n 3 -l 4 -i #{f.path} -o #{fo.path}`
  IO.read("#{fo.path}.clstr")
end
split_into_potential_neuropeptides(sequence) click to toggle source
# File lib/npsearch/scoresequence.rb, line 23
def split_into_potential_neuropeptides(sequence)
  potential_nps = []
  results = sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
  headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
               di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
  results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
  sequence.potential_cleaved_nps = potential_nps
end
write_potential_peptides_to_tempfile(sequence, tempfile) click to toggle source
# File lib/npsearch/scoresequence.rb, line 99
def write_potential_peptides_to_tempfile(sequence, tempfile)
  return false if sequence.potential_cleaved_nps.empty?
  sequences = ''
  sequence.potential_cleaved_nps.each_with_index do |e, i|
    sequences += ">seq#{i}\n#{e[:np]}\n"
  end
  tempfile.write(sequences)
  tempfile.close
  true
end