module TransmembraneIndex

A transmemIndex is a hash that takes a fasta reference as key and returns a structured hash containing the transmembrane information.

Public Class Methods

filetype(file) click to toggle source

returns :toppred or :phobius

# File lib/transmembrane.rb, line 7
def self.filetype(file)
  tp = nil
  File.open(file) do |fh|
    while (line = fh.gets)
      case line
      when /SEQENCE/
        tp = :phobius 
        break
      when /    0  0 i/
        tp = :phobius  # if they don't have the headers,
                       # this will pick it up if they have a
                       # single prot without tm or signal peptide.
        break
      when /Algorithm specific parameters/
        tp = :toppred  # New text
        break
      when /<parameters>/
        tp = :toppred  # XML
        break
      end
    end
  end
  tp
end
new(file) click to toggle source

right now accepts toppred.out files Phobius objects can use the fasta object to update their hash for methods like avg_overlap

# File lib/transmembrane.rb, line 39
def self.new(file)
  case x = filetype(file)
  when :toppred
    require 'transmembrane/toppred'
    TopPred::Index.new(file)
  when :phobius
    require 'transmembrane/phobius'
    # warn "WARNING: You have NO fasta object with Phobius based TransmembraneIndex! (which needs one to do proper indexing!)" unless fasta
    Phobius::Index.new(file)
  else 
    raise ArgumentError, "#{x} filetype for #{file} not recognized!"
  end
end

Public Instance Methods

avg_overlap(key, sequence, tp=:number) click to toggle source

tp = :number or :fraction which is the fraction of the sequence size returns the average number of overlapping amino acids with transmembrane segments returns nil if there is no protein by that key

# File lib/transmembrane.rb, line 66
def avg_overlap(key, sequence, tp=:number)
  if self.key? key
    numbers = num_transmem_aa(self[key], sequence)
    if numbers.size > 0
      sum = 0
      numbers.each {|num| sum += num}
      avg_num = sum.to_f / numbers.size
      # the one line way to do it
      #avg_num = numbers.inject(0) {|memo,num| num + memo }.to_f / numbers.size
      if tp == :fraction
        avg_num / sequence.size
        # this is the same as doing this:
        #numbers.inject(0.0) {|memo,num| (num.to_f/seq_size + memo) } / numbers.size
      else
        avg_num
      end
    else
      0.0
    end
  else  # what to do if the protein isn't there?? which happens on occasion
    nil
  end
end
num_certain_index() click to toggle source

returns a hash of key -> num certain transmembrane segments

# File lib/transmembrane.rb, line 54
def num_certain_index
  hash = {}
  self.each do |k,v|
    hash[k] = v[:num_certain_transmembrane_segments] || 0
  end
  hash
end
num_overlapping_chars(full_sequence, ranges, substring) click to toggle source

returns an array of the number of overlapping sequences in substring with the substrings defined in start_stop_doublets within full_sequence start_stop_doublets should be 0 indexed!!! the span includes the ‘stop’ position i.e., full_sequence

# File lib/transmembrane.rb, line 109
def num_overlapping_chars(full_sequence, ranges, substring)
  #start_positions = aaseq.enum_for(:scan, substring).map { $~.offset(0)[0]}
  if ranges.size == 0
    []
    #full_sequence.enum_for(:scan, substring).map { 0 }
  else
    substring_ranges = []
    pos = 0
    slen = substring.size
    while i=full_sequence.index(substring,pos)
      substring_ranges << Range.new(i, i+slen-1)
      pos = i + slen
    end
    # brute force way
    last_tm_range = ranges.last.last
    to_return = substring_ranges.map do |sb|
      overlap = 0
      # there's got to be a much simpler way to do this, but this does work...
      ranges.each do |tm|
        (frst, lst) = 
          if tm.include?( sb.first )
            [tm, sb]
          elsif tm.include?( sb.last )
            [sb, tm]
          else
            nil
          end
        if frst
          if lst.last <= frst.last
            overlap += (frst.last+1 - frst.first) - (lst.first - frst.first) - (frst.last - lst.last)
          else 
            overlap += (frst.last+1 - frst.first) - (lst.first - frst.first)
          end
        end
      end
      overlap
    end
  end
end
num_transmem_aa(tmhash, sequence) click to toggle source

returns an array (usually length of 1) of the number of amino acids contained inside transmembrane spanning segments. assumes that tmhash has the key ‘transmembrane_segments’ if there are no transmembrane segments, returns empty array.

# File lib/transmembrane.rb, line 94
def num_transmem_aa(tmhash, sequence)
  if tmhash.key? :transmembrane_segments
    ranges = tmhash[:transmembrane_segments].map do |tmseg|
      Range.new( tmseg[:start]-1, tmseg[:stop]-1 )
    end
    num_overlapping_chars(tmhash[:aaseq], ranges, sequence)
  else
    []
  end
end
reference_to_key(reference) click to toggle source
# File lib/transmembrane.rb, line 32
def reference_to_key(reference)
  # needs to be subclassed or written
end