class Bio::GCG::Msf
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf
is a msf format parser.
Constants
- DELIMITER
delimiter used by
Bio::FlatFile
Attributes
checksum[R]
checksum
date[R]
date
description[R]
description
entry_id[R]
ID of the alignment
heading[R]
heading ('!!NA_MULTIPLE_ALIGNMENT 1.0' or whatever like this)
length[R]
alignment length
seq_type[R]
sequence type (āNā for DNA/RNA or āPā for protein)
Public Class Methods
new(str)
click to toggle source
Creates a new Msf
object.
# File lib/bio/appl/gcg/msf.rb 31 def initialize(str) 32 str = str.sub(/\A[\r\n]+/, '') 33 preamble, @data = str.split(/^\/\/$/, 2) 34 preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') 35 @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this 36 preamble.sub!(/.*\.\.\s*$/m, '') 37 @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s 38 d = $&.to_s 39 if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then 40 @entry_id = m[1].to_s.strip 41 @length = (m[2] ? m[2].to_i : nil) 42 @seq_type = m[3] 43 @date = m[4].to_s.strip 44 @checksum = (m[6] ? m[6].to_i : nil) 45 end 46 47 @seq_info = [] 48 preamble.each_line do |x| 49 if /Name\: / =~ x then 50 s = {} 51 x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } 52 @seq_info << s 53 end 54 end 55 56 @description.sub!(/\A(\r\n|\r|\n)/, '') 57 @align = nil 58 end
Public Instance Methods
alignment()
click to toggle source
returns Bio::Alignment
object.
# File lib/bio/appl/gcg/msf.rb 176 def alignment 177 do_parse 178 @align 179 end
compcheck()
click to toggle source
CompCheck field
# File lib/bio/appl/gcg/msf.rb 118 def compcheck 119 unless defined?(@compcheck) 120 if /CompCheck\: +(\d+)/ =~ @description then 121 @compcheck = $1.to_i 122 else 123 @compcheck = nil 124 end 125 end 126 @compcheck 127 end
gap_length_weight()
click to toggle source
gap length weight
# File lib/bio/appl/gcg/msf.rb 109 def gap_length_weight 110 unless defined?(@gap_length_weight) 111 /GapLengthWeight\: +(\S+)/ =~ @description 112 @gap_length_weight = $1 113 end 114 @gap_length_weight 115 end
gap_weight()
click to toggle source
gap weight
# File lib/bio/appl/gcg/msf.rb 100 def gap_weight 101 unless defined?(@gap_weight) 102 /GapWeight\: +(\S+)/ =~ @description 103 @gap_weight = $1 104 end 105 @gap_weight 106 end
seq_data()
click to toggle source
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb 182 def seq_data 183 do_parse 184 @seq_data 185 end
symbol_comparison_table()
click to toggle source
symbol comparison table
# File lib/bio/appl/gcg/msf.rb 91 def symbol_comparison_table 92 unless defined?(@symbol_comparison_table) 93 /Symbol comparison table\: +(\S+)/ =~ @description 94 @symbol_comparison_table = $1 95 end 96 @symbol_comparison_table 97 end
validate_checksum()
click to toggle source
validates checksum
# File lib/bio/appl/gcg/msf.rb 188 def validate_checksum 189 do_parse 190 valid = true 191 total = 0 192 @seq_data.each_with_index do |x, i| 193 sum = Bio::GCG::Seq.calc_checksum(x) 194 if sum != @seq_info[i]['Check'].to_i 195 valid = false 196 break 197 end 198 total += sum 199 end 200 return false unless valid 201 if @checksum != 0 # "Check:" field of BioPerl is always 0 202 valid = ((total % 10000) == @checksum) 203 end 204 valid 205 end
Private Instance Methods
do_parse()
click to toggle source
parsing
# File lib/bio/appl/gcg/msf.rb 130 def do_parse 131 return if @align 132 a = @data.split(/\r?\n\r?\n/) 133 @seq_data = Array.new(@seq_info.size) 134 @seq_data.collect! { |x| Array.new } 135 a.each do |x| 136 next if x.strip.empty? 137 b = x.sub(/\A[\r\n]+/, '').split(/[\r\n]+/) 138 nw = 0 139 if b.size > @seq_info.size then 140 if /^ +/ =~ b.shift.to_s 141 nw = $&.to_s.length 142 end 143 end 144 if nw > 0 then 145 b.each_with_index { |y, i| y[0, nw] = ''; @seq_data[i] << y } 146 else 147 b.each_with_index { |y, i| 148 @seq_data[i] << y.strip.split(/ +/, 2)[1].to_s 149 } 150 end 151 end 152 153 case seq_type 154 when 'P', 'p' 155 k = Bio::Sequence::AA 156 when 'N', 'n' 157 k = Bio::Sequence::NA 158 else 159 k = Bio::Sequence::Generic 160 end 161 @seq_data.collect! do |x| 162 y = x.join('') 163 y.gsub!(/[\s\d]+/, '') 164 k.new(y) 165 end 166 167 aln = Bio::Alignment.new 168 @seq_data.each_with_index do |x, i| 169 aln.store(@seq_info[i]['Name'], x) 170 end 171 @align = aln 172 end