class MgNu::Parser::ClustalW
ClustalW
is the class used for parsing clustalw multiple alignment output.
Attributes
alignment[R]
buffer[RW]
file[R]
raw[RW]
Public Class Methods
new(input = nil, file = true)
click to toggle source
params [String] alignment file (*.aln) params [Boolean] is this a file (default is true), or a string? returns [MgNu::Alignment]
# File lib/mgnu/parser/clustalw.rb, line 11 def initialize(input = nil, file = true) if input if file if File.exists?(input) and File.readable?(input) @raw = File.read(input) end # end of exists and readable file checks else # file is false, so this must be a string with input @raw = input end @buffer = @raw.split(/\r?\n\r?\n/) @alignment = nil self.parse if @buffer.length == 0 puts "ClustalW alignment file #{input} did not parse!" exit(1); end else error("MgNu::Parser::ClustalW.new(): need an existing file") end end
Public Instance Methods
parse()
click to toggle source
process the input multiple alignement
# File lib/mgnu/parser/clustalw.rb, line 33 def parse if @alignment == nil header = @buffer.shift @buffer[0].gsub!(/^(\r?\n)+/, '') # drop newline at start of section @buffer.collect! { |section| section.split(/\r?\n/) } match_lines = [] # drop numbers if the alignment was run with "-SEQNOS=on" @buffer.each do |section| section.each { |line| line.sub!(/\s+\d+\s*$/, '') } match_lines << section.pop end # get the 1st position of a space from the right using # rindex. Increment this by 1 to get the seq_start seq_start = (@buffer[0][0].rindex(/\s/) || -1) + 1 # create ordered array of hashes with # seqname => sequence and create an array with a order of # sequences (seqname as value) order = Array.new h = Hash.new @buffer.each do |section| section.each do |line| name = line[0, seq_start].sub(/\s+\z/, '') sequence = line[seq_start..-1] if h.has_key?(name) h[name] += sequence else order << name h[name] = sequence end end end end @alignment = MgNu::Alignment.new(h, order) end