class Bio::HMMER::Report
A parser class for a search report by hmmsearch or hmmpfam program in the HMMER
package.
Examples¶ ↑
Examples #for multiple reports in a single output file (example.hmmpfam) Bio::HMMER.reports(File.read("example.hmmpfam")) do |report| report.program['name'] report.parameter['HMM file'] report.query_info['Query sequence'] report.hits.each do |hit| hit.accession hit.description hit.score hit.evalue hit.hsps.each do |hsp| hsp.accession hsp.domain hsp.evalue hsp.midline end end
References
¶ ↑
Constants
- DELIMITER
Delimiter of each entry for
Bio::FlatFile
support.
Attributes
statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
statistics by hmmsearch.
Returns an Array of Bio::HMMER::Report::Hsp
objects. Under special circumstances, some HSPs do not have parent Hit
objects. If you want to access such HSPs, use this method.
A hash contains parameters used. Valid keys are 'HMM file' and 'Sequence file'.
A Hash contains program information used. Valid keys are 'name', 'version', 'copyright' and 'license'.
A hash contains the query information. Valid keys are 'query sequence', 'Accession' and 'Description'.
statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'.
statistics by hmmsearch.
statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
Public Class Methods
Parses a HMMER
search report (by hmmpfam or hmmsearch program) and reutrns a Bio::HMMER::Report
object.
Examples¶ ↑
hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out")) hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out"))
# File lib/bio/appl/hmmer/report.rb 156 def initialize(data) 157 158 # The input data is divided into six data fields, i.e. header, 159 # query infomation, hits, HSPs, alignments and search statistics. 160 # However, header and statistics data don't necessarily exist. 161 subdata, is_hmmsearch = get_subdata(data) 162 163 # if header exists, parse it 164 if subdata["header"] 165 @program, @parameter = parse_header_data(subdata["header"]) 166 else 167 @program, @parameter = [{}, {}] 168 end 169 170 @query_info = parse_query_info(subdata["query"]) 171 @hits = parse_hit_data(subdata["hit"]) 172 @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch) 173 174 if @hsps != [] 175 # split alignment subdata into an array of alignments 176 aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1) 177 178 # append alignment information to corresponding Hsp 179 aln_ary.each_with_index do |aln, i| 180 @hsps[i].set_alignment(aln) 181 end 182 end 183 184 # assign each Hsp object to its parent Hit 185 hits_hash = {} 186 @hits.each do |hit| 187 hits_hash[hit.accession] = hit 188 end 189 @hsps.each do |hsp| 190 if hits_hash.has_key?(hsp.accession) 191 hits_hash[hsp.accession].append_hsp(hsp) 192 end 193 end 194 195 # parse statistics (for hmmsearch) 196 if is_hmmsearch 197 @histogram, @statistical_detail, @total_seq_searched, \ 198 @whole_seq_top_hits, @domain_top_hits = \ 199 parse_stat_data(subdata["statistics"]) 200 end 201 202 end
Public Instance Methods
Iterates each hit (Bio::HMMER::Report::Hit
).
# File lib/bio/appl/hmmer/report.rb 206 def each 207 @hits.each do |hit| 208 yield hit 209 end 210 end
Private Instance Methods
Bio::HMMER::Report#get_subdata
# File lib/bio/appl/hmmer/report.rb 215 def get_subdata(data) 216 subdata = {} 217 header_prefix = '\Ahmm(search|pfam) - search' 218 query_prefix = '^Query (HMM|sequence): .*\nAccession: ' 219 hit_prefix = '^Scores for (complete sequences|sequence family)' 220 hsp_prefix = '^Parsed for domains:' 221 aln_prefix = '^Alignments of top-scoring domains:\n' 222 stat_prefix = '^\nHistogram of all scores:' 223 224 # if header exists, get it 225 if data =~ /#{header_prefix}/ 226 is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam 227 subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m] 228 else 229 is_hmmsearch = false # if no header, assumed to be hmmpfam 230 end 231 232 # get query, Hit and Hsp data 233 subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m] 234 subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m] 235 subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m] 236 237 # get alignment data 238 if is_hmmsearch 239 data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m 240 subdata["alignment"] = $1 241 else 242 data =~ /#{aln_prefix}(.+?)\/\/\n/m 243 subdata["alignment"] = $1 244 raise "multiple reports found" if $'.length > 0 245 end 246 247 # handle -A option of HMMER 248 cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z' 249 subdata["alignment"].sub!(/#{cutoff_line}/, '') 250 251 # get statistics data 252 subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m] 253 254 [subdata, is_hmmsearch] 255 end
Bio::HMMER::Report#parse_header_data
# File lib/bio/appl/hmmer/report.rb 260 def parse_header_data(data) 261 data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m 262 program_data = $1 263 parameter_data = $2 264 265 program = {} 266 program['name'], program['version'], program['copyright'], \ 267 program['license'] = program_data.split(/\n/) 268 269 parameter = {} 270 parameter_data.each_line do |x| 271 if /^(.+?):\s+(.*?)\s*$/ =~ x 272 parameter[$1] = $2 273 end 274 end 275 276 [program, parameter] 277 end
Bio::HMMER::Report#parse_hit_data
# File lib/bio/appl/hmmer/report.rb 297 def parse_hit_data(data) 298 data.sub!(/.+?---\n/m, '').chop! 299 hits = [] 300 return hits if data == "\t[no hits above thresholds]\n" 301 data.each_line do |l| 302 hits.push(Hit.new(l)) 303 end 304 hits 305 end
Bio::HMMER::Report#parse_hsp_data
# File lib/bio/appl/hmmer/report.rb 310 def parse_hsp_data(data, is_hmmsearch) 311 data.sub!(/.+?---\n/m, '').chop! 312 hsps=[] 313 return hsps if data == "\t[no hits above thresholds]\n" 314 data.each_line do |l| 315 hsps.push(Hsp.new(l, is_hmmsearch)) 316 end 317 return hsps 318 end
Bio::HMMER::Report#parse_query_info
# File lib/bio/appl/hmmer/report.rb 282 def parse_query_info(data) 283 hash = {} 284 data.each_line do |x| 285 if /^(.+?):\s+(.*?)\s*$/ =~ x 286 hash[$1] = $2 287 elsif /\s+\[(.+)\]/ =~ x 288 hash['comments'] = $1 289 end 290 end 291 hash 292 end
Bio::HMMER::Report#parse_stat_data
# File lib/bio/appl/hmmer/report.rb 323 def parse_stat_data(data) 324 data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '') 325 histogram = $1.strip 326 327 statistical_detail = {} 328 data.sub!(/(.+?)\n\n/m, '') 329 $1.each_line do |l| 330 statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l 331 end 332 333 total_seq_searched = nil 334 data.sub!(/(.+?)\n\n/m, '') 335 $1.each_line do |l| 336 total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l 337 end 338 339 whole_seq_top_hits = {} 340 data.sub!(/(.+?)\n\n/m, '') 341 $1.each_line do |l| 342 if /^\s*(.+?):\s*(\d+)\s*$/ =~ l 343 whole_seq_top_hits[$1] = $2.to_i 344 elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l 345 whole_seq_top_hits[$1] = $2 346 end 347 end 348 349 domain_top_hits = {} 350 data.each_line do |l| 351 if /^\s*(.+?):\s*(\d+)\s*$/ =~ l 352 domain_top_hits[$1] = $2.to_i 353 elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l 354 domain_top_hits[$1] = $2 355 end 356 end 357 358 [histogram, statistical_detail, total_seq_searched, \ 359 whole_seq_top_hits, domain_top_hits] 360 end