class BlastTableResult
Extracts results from blast table’s file and uses it to create instances of “BlastQuery” and “BlastHit”
Public Class Methods
new(input)
click to toggle source
Parser initialization
Calls superclass method
BlastResult::new
# File lib/scbi_blast/blast_table_result.rb, line 31 def initialize(input) super(input) return if input.empty? if input.is_a?(Array) lines=input else fich = File.open(input,'r') lines = fich.readlines fich.close end parse(lines) end
Public Instance Methods
parse(lines)
click to toggle source
# File lib/scbi_blast/blast_table_result.rb, line 50 def parse(lines) with_comments=false if lines.first.index('#')==0 with_comments=true if !(lines.last =~ /# BLAST processed (\d+) queries/) raise "Blast didn't processed your queries" end end query_name='' lines.each do |line| line.chomp! #delete end of line if line =~ /^\s*#/ if line =~ /^#\sQuery:\s+(.+)$/ query_name = $1 elsif line =~ /^#\s0\shits\sfound$/ @querys.push BlastQuery.new(query_name) end # 0 hits found else params = line.split(/\t+/) # puts "Extracted #{params[0]} #{params[1]} #{params[2]} #{params[3]} #{params[4]} #{params[5]} #{params[6]} #{params[7]} #{params[8]} #{params[9]} #{params[10]} #{params[11]}" # Options 6, 7, and 10 can be additionally configured to produce # a custom format specified by space delimited format specifiers. # The supported format specifiers are: # qseqid means Query Seq-id # qgi means Query GI # qacc means Query accesion # sseqid means Subject Seq-id # sallseqid means All subject Seq-id(s), separated by a ';' # sgi means Subject GI # sallgi means All subject GIs # sacc means Subject accession # sallacc means All subject accessions # qstart means Start of alignment in query # qend means End of alignment in query # sstart means Start of alignment in subject # send means End of alignment in subject # qseq means Aligned part of query sequence # sseq means Aligned part of subject sequence # evalue means Expect value # bitscore means Bit score # score means Raw score # length means Alignment length # pident means Percentage of identical matches # nident means Number of identical matches # mismatch means Number of mismatches # positive means Number of positive-scoring matches # gapopen means Number of gap openings # gaps means Total number of gaps # ppos means Percentage of positive-scoring matches # frames means Query and subject frames separated by a '/' # qframe means Query frame # sframe means Subject frame # When not provided, the default value is: # 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send # evalue bitscore', which is equivalent to the keyword 'std' # if the query doesn't exist, then create a new one, # else the hit will be added to the last query qseqid,sacc,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,score,qframe,sframe,qseq,sseq,qlen,slen,stitle = params # if format6 #if !with_comments and query_name!=qseqid # @querys.push BlastQuery.new(query_name) # query_name=qseqid #end # creates the hit hit = BlastHit.new(qstart,qend,sstart,send) hit.align_len=length hit.ident=pident hit.gaps=gapopen hit.mismatches=mismatch hit.e_val=evalue hit.bit_score=bitscore hit.score = score hit.q_frame = qframe hit.s_frame = sframe hit.subject_id = sacc hit.full_subject_length=slen # era 0 hit.definition=stitle # era sacc hit.acc=sacc hit.q_seq=qseq hit.s_seq=sseq hit.q_len=qlen hit.s_len=slen query=find_query(@querys,qseqid) if (query) #if it is a new query, it is created and added query.add_hit(hit) else # else the hit is added in last query added query = BlastQuery.new(qseqid) query.add_hit(hit) @querys.push query end query.full_query_length=qlen #Description # read_blast_tab read tabular BLAST format created with blast_seq and written to file with write_blast - or with blastall and the -m 8 or -m 9 switch. # Each column in the table corresponds to the following keys: # # 1. Q_ID - Query ID. # 2. S_ID - Subject ID. # 3. IDENT - Identity (%). # 4. ALIGN_LEN - Alignment length. # 5. MISMATCHES - Number of mismatches. # 6. GAPS - Number of gaps. # 7. Q_BEG - Query begin. # 8. Q_END - Query end. # 9. S_BEG - Subject begin. # 10. S_END - Subject end. # 11. E_VAL - Expect value. # 12. BIT_SCORE - Bit score. # # Furthermore, two extra keys are added to the record: # # * STRAND - Strand. # * REC_TYPE - Record type. end end #inspect end