class JgiGenesGff
Public Class Methods
new(path)
click to toggle source
# File lib/jgi_genes.rb, line 13 def initialize(path) @jgi_file = File.open(path, "r") @next_gff = read_record end
Public Instance Methods
distance_iterator()
click to toggle source
# File lib/jgi_genes.rb, line 95 def distance_iterator return JgiGenesIterator.new(self) end
next_gene()
click to toggle source
Return a enhanced_gene object or nil if none exists
# File lib/jgi_genes.rb, line 19 def next_gene # Parse the first line into data structures for current gene cur = @next_gff if !cur return nil end # Make sure the assumption that the first one is an exon is true if cur.feature==='exon' seqname = cur.seqname strand = cur.strand source = cur.source name = parse_name(cur.attributes) f = Bio::Location.new f.from = cur.start f.to = cur.end exons = [f] cds = [] protein_id = nil #Unknown until we have a CDS line in the file # Continue reading until finished gene or finished file finished_gene = false while !finished_gene and (cur = read_record) # if still in the same gene if parse_name(cur.attributes) === name if cur.strand != strand or cur.seqname != seqname or cur.source != source puts "EXCEPTION !!!!!!!!!!!!!!!!!!!" raise Exception, 'Data bug in JGI file or parsing is being done incorrectly' end f = Bio::Location.new f.from = cur.start f.to = cur.end case cur.feature when 'exon' exons.push f when 'CDS' cds.push f protein_id = parse_protein_id(cur.attributes) when 'start_codon' #meh when 'stop_codon' else puts "EXCEPTION !!!!!!!!!!!!!!!!!!!" raise Exception, "Unknown feature type #{cur.feature} found." end else finished_gene = true end end #make ready for the next gene @next_gff = cur #create a new positioned gene with the useful characteristics # puts "Returning gene:" # p exons.length # p cds.length g = PositionedGene.new g.seqname = seqname g.name = name g.strand = strand g.start = exons[0].from g.exons = exons g.cds = cds g.protein_id = protein_id return g else p cur.feature # I'm not sure if this is detrimental or not, but to be safe.. raise Exception, "Assumption failed: exon is not first feature in the gene" end end
Private Instance Methods
parse_name(attributes)
click to toggle source
Return the name of the gene, given the attributes hash
# File lib/jgi_genes.rb, line 119 def parse_name(attributes) name = attributes['name'].gsub('"','') return name end
parse_protein_id(attributes)
click to toggle source
# File lib/jgi_genes.rb, line 125 def parse_protein_id(attributes) return attributes['proteinId'].to_i end
read_record()
click to toggle source
Read a line from the file, and create the next gff object, or nil if none exists
# File lib/jgi_genes.rb, line 102 def read_record line = "" while line.lstrip.rstrip.empty? line = @jgi_file.gets if !line return nil end end whole = JgiGffRecord.new(line) return whole end