class BioVcf::VcfHeader
Attributes
field[R]
lines[R]
Public Class Methods
new(debug = false)
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 37 def initialize(debug = false) @debug = debug @lines = [] @field = {} @meta = nil @cached_filter_index = {} end
Public Instance Methods
add(line)
click to toggle source
Add a new field to the header
# File lib/bio-vcf/vcfheader.rb, line 46 def add line @lines += line.split(/\n/) end
column_names()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 64 def column_names @column_names ||= VcfHeaderParser::get_column_names(@lines) end
columns()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 68 def columns @column ||= column_names.size end
contig()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 176 def contig find_fields('contig') end
filter()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 172 def filter find_fields('FILTER') end
find_field(name)
click to toggle source
Look for a line in the header with the field name and return the value, otherwise return nil
# File lib/bio-vcf/vcfheader.rb, line 136 def find_field name return field[name] if field[name] @lines.each do | line | value = line.scan(/###{name}=(.*)/) if value[0] v = value[0][0] field[name] = v return v end end nil end
find_fields(name)
click to toggle source
Look for all the lines that match the field name and return a hash of hashes. An empty hash is returned when there are no matches.
# File lib/bio-vcf/vcfheader.rb, line 152 def find_fields name res = {} @lines.each do | line | value = line.scan(/###{name}=<(.*)>/) if value[0] str = value[0][0] # p str v = VcfHeaderParser.parse_field(line,@debug) id = v['ID'] res[id] = v end end # p res res end
format()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 168 def format find_fields('FORMAT') end
gatkcommandline()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 184 def gatkcommandline find_fields('GATKCommandLine') end
info()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 180 def info find_fields('INFO') end
meta()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 188 def meta return @meta if @meta res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} } @lines.each do | line | value = line.scan(/##(.*?)=(.*)/) if value[0] k,v = value[0] if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine' # p [k,v] res[k] = v end end end res['INFO'] = info() res['FORMAT'] = format() res['FILTER'] = filter() res['contig'] = contig() res['GATKCommandLine'] = gatkcommandline() # p [:res, res] @meta = res # cache values res end
method_missing(m, *args, &block)
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 211 def method_missing(m, *args, &block) name = m.to_s value = find_field(name) return value if value raise "Unknown VCF header query '#{name}'" end
num_samples()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 94 def num_samples @num_samples ||= ( samples == nil ? 0 : samples.size ) end
printable_header_line(fields)
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 72 def printable_header_line(fields) fields.map { | field | if field == '#samples' samples else field end }.join("\t") end
sample_index()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 98 def sample_index return @sample_index if @sample_index index = {} samples.each_with_index { |k,i| index[k] = i+9 ; index[k.downcase] = i+9 } @sample_index = index index end
sample_subset_index(list)
click to toggle source
Give a list of samples (by index and/or name) and return 0-based index values The cache has to be able to hanle multiple lists - that is why it is a hash.
# File lib/bio-vcf/vcfheader.rb, line 108 def sample_subset_index list cached = @cached_filter_index[list] if cached l = cached else l = [] list = samples_index_array() if not list list.each { |i| value = begin Integer(i) rescue idx = samples.index(i) if idx != nil idx else raise "Unknown sample name '#{i}'" end end l << value } @cached_filter_index[list] = l end l end
samples()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 82 def samples @samples ||= if column_names.size > 8 column_names[9..-1] else [] end end
samples_index_array()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 90 def samples_index_array @all_samples_index ||= column_names[9..-1].fill{|i| i} end
tag(h)
click to toggle source
Push a special key value list to the header
# File lib/bio-vcf/vcfheader.rb, line 51 def tag h h2 = h.dup [:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) } info = h2.map { |k,v| k.to_s.capitalize+'='+'"'+v.to_s+'"' }.join(',') line = '##BioVcf=<'+info+'>' @lines.insert(-2,line) line end
version()
click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 60 def version @version ||= lines[0].scan(/##fileformat=VCFv(\d+\.\d+)/)[0][0] end