class BlastTableResult

Extracts results from blast table’s file and uses it to create instances of “BlastQuery” and “BlastHit”

Public Class Methods

new(input) click to toggle source

Parser initialization

Calls superclass method BlastResult::new
# File lib/scbi_blast/blast_table_result.rb, line 31
def initialize(input)

  super(input)

  return if input.empty?
  
  if input.is_a?(Array)
    lines=input

  else

    fich = File.open(input,'r')
    lines = fich.readlines
    fich.close

  end
  parse(lines)
end

Public Instance Methods

parse(lines) click to toggle source
# File lib/scbi_blast/blast_table_result.rb, line 50
def parse(lines)
  
  with_comments=false

  if lines.first.index('#')==0 
      with_comments=true
      if !(lines.last =~ /# BLAST processed (\d+) queries/)
          raise "Blast didn't processed your queries"
      end
  end
  
  query_name=''

  lines.each do |line|

    line.chomp! #delete end of line

    if line =~ /^\s*#/
      if line =~ /^#\sQuery:\s+(.+)$/
        query_name = $1
      elsif line =~ /^#\s0\shits\sfound$/
        @querys.push BlastQuery.new(query_name)
      end
      
      # 0 hits found

    else
      params = line.split(/\t+/)

      # puts "Extracted #{params[0]} #{params[1]} #{params[2]} #{params[3]} #{params[4]} #{params[5]} #{params[6]} #{params[7]} #{params[8]} #{params[9]} #{params[10]} #{params[11]}"
      #         Options 6, 7, and 10 can be additionally configured to produce
      #   a custom format specified by space delimited format specifiers.
      #   The supported format specifiers are:
      #            qseqid means Query Seq-id
      #               qgi means Query GI
      #              qacc means Query accesion
      #            sseqid means Subject Seq-id
      #         sallseqid means All subject Seq-id(s), separated by a ';'
      #               sgi means Subject GI
      #            sallgi means All subject GIs
      #              sacc means Subject accession
      #           sallacc means All subject accessions
      #            qstart means Start of alignment in query
      #              qend means End of alignment in query
      #            sstart means Start of alignment in subject
      #              send means End of alignment in subject
      #              qseq means Aligned part of query sequence
      #              sseq means Aligned part of subject sequence
      #            evalue means Expect value
      #          bitscore means Bit score
      #             score means Raw score
      #            length means Alignment length
      #                        pident means Percentage of identical matches
      #            nident means Number of identical matches
      #          mismatch means Number of mismatches
      #          positive means Number of positive-scoring matches
      #           gapopen means Number of gap openings
      #              gaps means Total number of gaps
      #              ppos means Percentage of positive-scoring matches
      #            frames means Query and subject frames separated by a '/'
      #            qframe means Query frame
      #            sframe means Subject frame
      #   When not provided, the default value is:
      #   'qseqid sseqid pident length mismatch gapopen qstart qend sstart send
      #   evalue bitscore', which is equivalent to the keyword 'std'

      # if  the query doesn't exist, then create a new one,
      # else the hit will be added to the last query

      qseqid,sacc,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,score,qframe,sframe,qseq,sseq,qlen,slen,stitle = params
      
      # if format6
      #if !with_comments and query_name!=qseqid
      #    @querys.push BlastQuery.new(query_name)
      #    query_name=qseqid
      #end

      # creates the hit
      hit = BlastHit.new(qstart,qend,sstart,send)

      hit.align_len=length
      hit.ident=pident

      hit.gaps=gapopen
      hit.mismatches=mismatch
      hit.e_val=evalue
      hit.bit_score=bitscore

      hit.score = score
      hit.q_frame = qframe
      hit.s_frame = sframe

      hit.subject_id = sacc
      hit.full_subject_length=slen # era 0
      hit.definition=stitle # era sacc
      hit.acc=sacc
      hit.q_seq=qseq
      hit.s_seq=sseq
      hit.q_len=qlen
      hit.s_len=slen
      

      query=find_query(@querys,qseqid)

      if (query)   #if it is a new query, it is created and added
        query.add_hit(hit)

      else      # else the hit is added in last query added
        query = BlastQuery.new(qseqid)
        query.add_hit(hit)
        @querys.push query
      end

      query.full_query_length=qlen

      #Description

      # read_blast_tab read tabular BLAST format created with blast_seq and written to file with write_blast - or with blastall and the -m 8 or -m 9 switch.
      #           Each column in the table corresponds to the following keys:
      #
      #              1. Q_ID - Query ID.
      #              2. S_ID - Subject ID.
      #              3. IDENT - Identity (%).
      #              4. ALIGN_LEN - Alignment length.
      #              5. MISMATCHES - Number of mismatches.
      #              6. GAPS - Number of gaps.
      #              7. Q_BEG - Query begin.
      #              8. Q_END - Query end.
      #              9. S_BEG - Subject begin.
      #             10. S_END - Subject end.
      #             11. E_VAL - Expect value.
      #             12. BIT_SCORE - Bit score.
      #
      #           Furthermore, two extra keys are added to the record:
      #
      #               * STRAND - Strand.
      #               * REC_TYPE - Record type.
    end
  end

  #inspect

end