class TopPred::Parser_Text
Public Instance Methods
to_index(io, index={})
click to toggle source
returns a hash structure in this form: {identifier => {aaseq => String, num_found: Int, num_certain_transmembrane_segments => Int, num_putative_transmembrane_segments => Int, best_structure_probability => Float, transmembrane_segments => [probability => Float, start => Int, stop
> Int, aaseq => String] } }¶ ↑
# File lib/transmembrane/toppred.rb, line 214 def to_index(io, index={}) current_record = nil io.each do |line| if line =~ /^Sequence : (.*?) +\(/ current_identifier = $1.dup index[current_identifier] = {} current_record = index[current_identifier] current_record[:aaseq] = read_aaseq(io) read_segment_summary(io, current_record) elsif line =~ /^HEADER\s+START\s+STOP/ top_struc = top_structure( read_structures(io) ) current_record[:best_structure_probability] = top_struc[:probability] current_record[:transmembrane_segments] = top_struc[:tm] add_sequences_to_segments(current_record[:transmembrane_segments], current_record[:aaseq]) segment_arrays_to_hashes(current_record[:transmembrane_segments]) end end index end
Private Instance Methods
num_certain_putative(fh)
click to toggle source
returns [certain, putative] expects first line to be a tm segment
# File lib/transmembrane/toppred.rb, line 317 def num_certain_putative(fh) certain = 0 putative = 0 fh.each do |line| certainty = line.chomp.split(/\s+/).last if !certainty break else certain += 1 if certainty == 'Certain' putative += 1 if certainty == 'Putative' end end [certain, putative] end
read_aaseq(fh)
click to toggle source
# File lib/transmembrane/toppred.rb, line 293 def read_aaseq(fh) aaseq = '' fh.each do |line| line.chomp! unless line =~ /[\w\*]/ break end aaseq << line end aaseq end
read_segment_summary(fh, rec)
click to toggle source
# File lib/transmembrane/toppred.rb, line 332 def read_segment_summary(fh, rec) fh.each do |line| if line =~ /Found: (.*?) segments/ rec[:num_found] = $1.to_i break if rec[:num_found] == 0 elsif line =~ /Helix\s+Begin/ (cert, putat) = num_certain_putative(fh) rec[:num_certain_transmembrane_segments] = cert rec[:num_putative_transmembrane_segments] = putat break end end end
read_segments(fh)
click to toggle source
returns an array of arrays of transmembrane segments: [prob(Float), start(Int), stop(Int)] returns after seeing ‘//’
# File lib/transmembrane/toppred.rb, line 266 def read_segments(fh) segments = [] st = Regexp.escape('//') ; end_regex = /#{st}/ fh.each do |line| if line =~ /^TRANSMEM/ (header, start, stop, len, prob) = line.split(/\s+/)[0,5] segments << [prob.to_f, start.to_i, stop.to_i] elsif line =~ end_regex break end end segments end
read_structure(fh)
click to toggle source
returns a hash with key :probability and key :tm contains an array of arrays: [prob(Float), start(Int), stop(Int)]
# File lib/transmembrane/toppred.rb, line 254 def read_structure(fh) structure = {} # READ the first line line = fh.readline structure[:probability] = line.split(/\s+/)[2].to_f structure[:tm] = read_segments(fh) structure end
read_structures(fh)
click to toggle source
returns a list of all structures given a filehandle starting just after the first “HEADER START STOP …” line
# File lib/transmembrane/toppred.rb, line 239 def read_structures(fh) structures = [] loop do structures.push( read_structure(fh) ) break if fh.eof? line = fh.readline unless line =~ /^HEADER\s+START\s+STOP/ break end end structures end
segment_arrays_to_hashes(list)
click to toggle source
# File lib/transmembrane/toppred.rb, line 305 def segment_arrays_to_hashes(list) list.map! do |ar| { :probability => ar[0], :start => ar[1], :stop => ar[2], :aaseq => ar[3], } end end
top_structure(list)
click to toggle source
returns the top probability structure (first on tie)
# File lib/transmembrane/toppred.rb, line 281 def top_structure(list) top_prob = list.first[:probability] top_struc = list.first list.each do |st| if st[:probability] > top_prob top_struc = st top_prob = st[:probability] end end top_struc end