class TopPred::Parser_Text

Public Instance Methods

to_index(io, index={}) click to toggle source

returns a hash structure in this form: {identifier => {aaseq => String, num_found: Int, num_certain_transmembrane_segments => Int, num_putative_transmembrane_segments => Int, best_structure_probability => Float, transmembrane_segments => [probability => Float, start => Int, stop

> Int, aaseq => String] } }

# File lib/transmembrane/toppred.rb, line 214
def to_index(io, index={})
  current_record = nil

  io.each do |line|
    if line =~ /^Sequence : (.*?) +\(/
      current_identifier = $1.dup
      index[current_identifier] = {}
      current_record = index[current_identifier]
      current_record[:aaseq] = read_aaseq(io)
      read_segment_summary(io, current_record)
    elsif line =~ /^HEADER\s+START\s+STOP/
      top_struc = top_structure( read_structures(io) )
      current_record[:best_structure_probability] = top_struc[:probability]
      current_record[:transmembrane_segments] = top_struc[:tm]
      add_sequences_to_segments(current_record[:transmembrane_segments], current_record[:aaseq])
      segment_arrays_to_hashes(current_record[:transmembrane_segments])
    end
  end
  index
end

Private Instance Methods

num_certain_putative(fh) click to toggle source

returns [certain, putative] expects first line to be a tm segment

# File lib/transmembrane/toppred.rb, line 317
def num_certain_putative(fh)
  certain = 0
  putative = 0
  fh.each do |line|
    certainty = line.chomp.split(/\s+/).last
    if !certainty
      break
    else
      certain += 1 if certainty == 'Certain'
      putative += 1 if certainty == 'Putative'
    end
  end
  [certain, putative]
end
read_aaseq(fh) click to toggle source
# File lib/transmembrane/toppred.rb, line 293
def read_aaseq(fh)
  aaseq = '' 
  fh.each do |line|
    line.chomp!
    unless line =~ /[\w\*]/
      break
    end
    aaseq << line 
  end
  aaseq
end
read_segment_summary(fh, rec) click to toggle source
# File lib/transmembrane/toppred.rb, line 332
def read_segment_summary(fh, rec)
  fh.each do |line|
    if line =~ /Found: (.*?) segments/
      rec[:num_found] = $1.to_i
      break if rec[:num_found] == 0
    elsif line =~ /Helix\s+Begin/
      (cert, putat) = num_certain_putative(fh) 
      rec[:num_certain_transmembrane_segments] = cert
      rec[:num_putative_transmembrane_segments] = putat
      break
    end
  end
end
read_segments(fh) click to toggle source

returns an array of arrays of transmembrane segments: [prob(Float), start(Int), stop(Int)] returns after seeing ‘//’

# File lib/transmembrane/toppred.rb, line 266
def read_segments(fh)
  segments = []
  st = Regexp.escape('//') ; end_regex = /#{st}/
  fh.each do |line|
    if line =~ /^TRANSMEM/
      (header, start, stop, len, prob) = line.split(/\s+/)[0,5]
      segments << [prob.to_f, start.to_i, stop.to_i]
    elsif line =~ end_regex
      break
    end
  end
  segments
end
read_structure(fh) click to toggle source

returns a hash with key :probability and key :tm contains an array of arrays: [prob(Float), start(Int), stop(Int)]

# File lib/transmembrane/toppred.rb, line 254
def read_structure(fh)
  structure = {}
  # READ the first line
  line = fh.readline
  structure[:probability] = line.split(/\s+/)[2].to_f
  structure[:tm] = read_segments(fh)
  structure
end
read_structures(fh) click to toggle source

returns a list of all structures given a filehandle starting just after the first “HEADER START STOP …” line

# File lib/transmembrane/toppred.rb, line 239
def read_structures(fh)
  structures = []
  loop do
    structures.push( read_structure(fh) )
    break if fh.eof?
    line = fh.readline
    unless line =~ /^HEADER\s+START\s+STOP/
      break
    end
  end
  structures
end
segment_arrays_to_hashes(list) click to toggle source
# File lib/transmembrane/toppred.rb, line 305
def segment_arrays_to_hashes(list)
  list.map! do |ar|
    { :probability => ar[0],
    :start => ar[1],
    :stop => ar[2],
    :aaseq => ar[3],
    }
  end
end
top_structure(list) click to toggle source

returns the top probability structure (first on tie)

# File lib/transmembrane/toppred.rb, line 281
def top_structure(list)
  top_prob = list.first[:probability]
  top_struc = list.first
  list.each do |st|
    if st[:probability] > top_prob 
      top_struc = st
      top_prob = st[:probability]
    end
  end
  top_struc
end