class Bio::GFF::GFF3
DESCRIPTION¶ ↑
Represents version 3 of GFF
specification. For more information on version GFF3
, see song.sourceforge.net/gff3.shtml
Constants
Attributes
GFF3
version string (String or nil). nil means “3”.
Metadata (except “##sequence-region”, “##gff-version”, “###”). Must be an array of Bio::GFF::GFF3::MetaData
objects.
Metadata of “##sequence-region”. Must be an array of Bio::GFF::GFF3::SequenceRegion
objects.
Sequences bundled within GFF3
. Must be an array of Bio::Sequence
objects.
Public Class Methods
Creates a Bio::GFF::GFF3
object by building a collection of Bio::GFF::GFF3::Record
(and metadata) objects.
Arguments:
-
str: string in
GFF
format
- Returns
-
Bio::GFF
object
# File lib/bio/db/gff.rb 877 def initialize(str = nil) 878 @gff_version = nil 879 @records = [] 880 @sequence_regions = [] 881 @metadata = [] 882 @sequences = [] 883 @in_fasta = false 884 parse(str) if str 885 end
Public Instance Methods
Parses a GFF3
entries, and concatenated the parsed data.
Note that after “##FASTA” line is given, only fasta-formatted text is accepted.
Arguments:
-
str: string in
GFF
format
- Returns
-
self
# File lib/bio/db/gff.rb 911 def parse(str) 912 # if already after the ##FASTA line, parses fasta format and return 913 if @in_fasta then 914 parse_fasta(str) 915 return self 916 end 917 918 if str.respond_to?(:gets) then 919 # str is a IO-like object 920 fst = nil 921 else 922 # str is a String 923 gff, sep, fst = str.split(/^(\>|##FASTA.*)/n, 2) 924 fst = sep + fst if sep == '>' and fst 925 str = gff 926 end 927 928 # parses GFF lines 929 str.each_line do |line| 930 if /^\#\#([^\s]+)/ =~ line then 931 parse_metadata($1, line) 932 parse_fasta(str) if @in_fasta 933 elsif /^\>/ =~ line then 934 @in_fasta = true 935 parse_fasta(str, line) 936 else 937 @records << GFF3::Record.new(line) 938 end 939 end 940 941 # parses fasta format when str is a String and fasta data exists 942 if fst then 943 @in_fasta = true 944 parse_fasta(fst) 945 end 946 947 self 948 end
string representation of whole entry.
# File lib/bio/db/gff.rb 966 def to_s 967 ver = @gff_version || VERSION.to_s 968 if @sequences.size > 0 then 969 seqs = "##FASTA\n" + 970 @sequences.collect { |s| s.to_fasta(s.entry_id, 70) }.join('') 971 else 972 seqs = '' 973 end 974 975 ([ "##gff-version #{escape(ver)}\n" ] + 976 @metadata.collect { |m| m.to_s } + 977 @sequence_regions.collect { |m| m.to_s } + 978 @records.collect{ |r| r.to_s }).join('') + seqs 979 end
Private Instance Methods
parses fasta formatted data
# File lib/bio/db/gff.rb 951 def parse_fasta(str, line = nil) 952 str.each_line("\n>") do |seqstr| 953 if line then seqstr = line + seqstr; line = nil; end 954 x = seqstr.strip 955 next if x.empty? or x == '>' 956 fst = Bio::FastaFormat.new(seqstr) 957 seq = fst.to_seq 958 seq.entry_id = 959 unescape(fst.definition.strip.split(/\s/, 2)[0].to_s) 960 @sequences.push seq 961 end 962 end
parses metadata
# File lib/bio/db/gff.rb 1838 def parse_metadata(directive, line) 1839 case directive 1840 when 'gff-version' 1841 @gff_version ||= line.split(/\s+/)[1] 1842 when 'FASTA' 1843 @in_fasta = true 1844 when 'sequence-region' 1845 @sequence_regions.push SequenceRegion.parse(line) 1846 when '#' # "###" directive 1847 @records.push RecordBoundary.new 1848 else 1849 @metadata.push MetaData.parse(line) 1850 end 1851 true 1852 end