class GFF3

Constants

CDS_feature

Public Class Methods

new(file: "", is_gz: true) click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 12
def initialize(file: "", is_gz: true)
        @file = file
        @is_gz = is_gz
end

Public Instance Methods

bedAroundGene(distance:1000, out:$stdout) click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 90
def bedAroundGene(distance:1000, out:$stdout)
        each_gene do |record|
                start = record.start-distance
                start = 1 if start < 1
                reg_end=record.end + distance
                out.puts [record.seqid, start, reg_end, "#{record.id}_#{record.source}_#{distance}bp", ".", record.strand].join "\t"
        end
end
calculate_mrna_stats() click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 66
def calculate_mrna_stats
        return if @mrna_stats
        @mrna_stats = Hash.new {|h,k| h[k] = MrnaStats.new(0,0) }
        last_mrna = ""
        last_record = nil
        each_cds do |record|
                parent = record.get_attribute "Parent"
                mrna = @mrna_stats[parent]
                mrna.cds_count += 1
                if last_mrna == parent
                        distance =  record.start - last_record.end 
                        mrna.cds_max_gap = distance if distance > mrna.cds_max_gap
                end
                last_record = record
                last_mrna   = parent
        end
        return
end
cds_to_print(mrna,cannonical_exons:[], colors:[" click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 100
def cds_to_print(mrna,cannonical_exons:[], colors:["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99",  "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"])

        cds_features = [] 
        i = 0
        offset=0
        offset_start=0
        each_cds do |record|
                target = record.get_attribute "Target"
                arr = target.split(" ")
                col = colors[i % colors.size ]
                start = arr[1].to_i + offset
                ends = arr[2].to_i + offset
                offset_start = record.start  if offset_start == 0
                tmp = CDS_feature.new(start, ends, col, 
                        record.seqid, record.start,record.end, record.start - offset_start )
                cds_features << tmp
                i += 1
        end
        cds_features
end
each() { |record| ... } click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 17
def each
        return enum_for(:each) unless block_given? 
        io = nil
        if @is_gz
                infile = open(@file)
                io = Zlib::GzipReader.new(infile) 
        else
                io =  File.open(@file)
        end
        parser = Bio::GFFbrowser::FastLineParser
        io.each_line do |line|  
                line.encode!('UTF-8', 'UTF-8', :invalid => :replace)
                line.strip!
                break if line == '##FASTA'
                next if line.length == 0 or line =~ /^#/
                begin
                        record = Bio::GFFbrowser::FastLineRecord.new(parser.parse_line_fast(line))
                        yield record
                rescue Exception => e
                        $stderr.puts "Unable to parse '#{line}'\n#{e}" 
                        throw e
                end
        end
end
each_cds() { |record| ... } click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 58
def each_cds
        return enum_for(:each_mrna) unless block_given? 
        self.each do |record|
                next unless record.feature == "CDS"
                yield record
        end 
end
each_gene() { |record| ... } click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 42
def each_gene
        return enum_for(:each_gene) unless block_given? 
        self.each do |record|
                next unless record.feature == "gene"
                yield record
        end 
end
each_mrna() { |record| ... } click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 50
def each_mrna
        return enum_for(:each_mrna) unless block_given? 
        self.each do |record|
                next unless record.feature == "mRNA"
                yield record
        end 
end
mrna_info(id) click to toggle source
# File lib/bio-pangenome/gff3_extensions.rb, line 85
def mrna_info(id)
        calculate_mrna_stats
        @mrna_stats[id] 
end