class TopPred::Parser_XML::DOM

Public Instance Methods

to_index(io, index = {}) click to toggle source

should return a index

# File lib/transmembrane/toppred.rb, line 152
def to_index(io, index = {})
  get_root_node_from_io(io) do |toppreds_n|

    abort if toppreds_n.name != 'toppreds'
    toppreds_n.find('child::toppred').each do |toppred_n|
      att_hash = {}
      sequence_n = toppred_n.find_first('child::sequence')
      index[sequence_n['id']] = att_hash
      att_hash[:aaseq] = sequence_n.content.gsub(/[\s\n]/,'')
      abort if att_hash[:aaseq].size != sequence_n['size'].to_i
      tmsummary_n = sequence_n.find_first('following-sibling::tmsummary')

      num_found = tmsummary_n['segments'].to_i
      att_hash[:num_found] = num_found
      if num_found > 0

        num_certain_transmembrane_segments = 0
        num_putative_transmembrane_segments = 0
        tmsummary_n.find('child::segment').each do |segment_n|
          abort if segment_n.name != 'segment'
          case segment_n['type']
          when 'certain'
            num_certain_transmembrane_segments += 1
          else # putative
            num_putative_transmembrane_segments += 1
          end
        end
        att_hash[:num_putative_transmembrane_segments] = num_putative_transmembrane_segments
        att_hash[:num_certain_transmembrane_segments] = num_certain_transmembrane_segments

        topologies_n = tmsummary_n.next
        abort if topologies_n.name != 'topologies'
        # get the top probability topology:
        top_prob_topology_n = topologies_n.find('child::topology').to_a.max {|a,b| a['prob'].to_f <=> b['prob'].to_f }
        tmsegments = []
        top_prob_topology_n.find('child::tmsegment').each do |tmsegment_n|
          tmhash = {}
          tmhash[:start] = tmsegment_n['start'].to_i
          tmhash[:stop] = tmsegment_n['stop'].to_i
          ## WARNING! it appears the probability is broken on xml output!!
          tmhash[:probability] = tmsegment_n['prob'].to_f
          tmsegments << tmhash
        end
        add_sequences_to_segments(tmsegments, att_hash[:aaseq])
        att_hash[:transmembrane_segments] = tmsegments
      end
    end
  end
  index
end