class JgiGenesGff

Public Class Methods

new(path) click to toggle source
# File lib/jgi_genes.rb, line 13
def initialize(path)
  @jgi_file = File.open(path, "r")
  @next_gff = read_record
end

Public Instance Methods

distance_iterator() click to toggle source
# File lib/jgi_genes.rb, line 95
def distance_iterator
  return JgiGenesIterator.new(self)
end
next_gene() click to toggle source

Return a enhanced_gene object or nil if none exists

# File lib/jgi_genes.rb, line 19
def next_gene
  # Parse the first line into data structures for current gene
  cur = @next_gff
  if !cur
    return nil
  end
  
  # Make sure the assumption that the first one is an exon is true
  if cur.feature==='exon'
    seqname = cur.seqname
    strand = cur.strand
    source = cur.source
    name = parse_name(cur.attributes)
    
    f = Bio::Location.new
    f.from = cur.start
    f.to = cur.end
    exons = [f]
    cds = []
    protein_id = nil #Unknown until we have a CDS line in the file
    
    # Continue reading until finished gene or finished file
    finished_gene = false
    while !finished_gene and (cur = read_record)
      
      
      # if still in the same gene
      if parse_name(cur.attributes) === name
        if cur.strand != strand or cur.seqname != seqname or cur.source != source
          puts "EXCEPTION !!!!!!!!!!!!!!!!!!!"
          raise Exception, 'Data bug in JGI file or parsing is being done incorrectly'
        end
        f = Bio::Location.new
        f.from = cur.start
        f.to = cur.end
        case cur.feature
        when 'exon'
          exons.push f
        when 'CDS'
          cds.push f
          protein_id = parse_protein_id(cur.attributes)
        when 'start_codon' #meh
        when 'stop_codon'
        else
          puts "EXCEPTION !!!!!!!!!!!!!!!!!!!"
          raise Exception, "Unknown feature type #{cur.feature} found."
        end
      else 
        finished_gene = true
      end
    end
    
    #make ready for the next gene
    @next_gff = cur
    
    #create a new positioned gene with the useful characteristics
    #      puts "Returning gene:"
    #      p exons.length
    #      p cds.length
    g = PositionedGene.new
    g.seqname = seqname
    g.name = name
    g.strand = strand
    g.start = exons[0].from
    g.exons = exons
    g.cds = cds
    g.protein_id = protein_id
    return g
  else
    p cur.feature
    # I'm not sure if this is detrimental or not, but to be safe..
    raise Exception, "Assumption failed: exon is not first feature in the gene"
  end
  
end

Private Instance Methods

parse_name(attributes) click to toggle source

Return the name of the gene, given the attributes hash

# File lib/jgi_genes.rb, line 119
def parse_name(attributes)
  name = attributes['name'].gsub('"','')
  return name
end
parse_protein_id(attributes) click to toggle source
# File lib/jgi_genes.rb, line 125
def parse_protein_id(attributes)
  return attributes['proteinId'].to_i
end
read_record() click to toggle source

Read a line from the file, and create the next gff object, or nil if none exists

# File lib/jgi_genes.rb, line 102
def read_record
  line = ""
    
  while line.lstrip.rstrip.empty?
    line = @jgi_file.gets
    if !line
      return nil
    end
  end
  
  
  whole = JgiGffRecord.new(line)
  return whole
end