class BioVcf::VcfHeader

Attributes

field[R]
lines[R]

Public Class Methods

new(debug = false) click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 37
def initialize(debug = false)
  @debug = debug
  @lines = []
  @field = {}
  @meta = nil
  @cached_filter_index = {}
end

Public Instance Methods

add(line) click to toggle source

Add a new field to the header

# File lib/bio-vcf/vcfheader.rb, line 46
def add line
  @lines += line.split(/\n/)
end
column_names() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 64
def column_names
  @column_names ||= VcfHeaderParser::get_column_names(@lines)
end
columns() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 68
def columns
  @column ||= column_names.size
end
contig() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 176
def contig
  find_fields('contig')
end
filter() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 172
def filter
  find_fields('FILTER')
end
find_field(name) click to toggle source

Look for a line in the header with the field name and return the value, otherwise return nil

# File lib/bio-vcf/vcfheader.rb, line 136
def find_field name
  return field[name] if field[name]
  @lines.each do | line |
    value = line.scan(/###{name}=(.*)/)
    if value[0]
      v = value[0][0]
      field[name] = v
      return v
    end
  end
  nil
end
find_fields(name) click to toggle source

Look for all the lines that match the field name and return a hash of hashes. An empty hash is returned when there are no matches.

# File lib/bio-vcf/vcfheader.rb, line 152
def find_fields name
  res = {}
  @lines.each do | line |
    value = line.scan(/###{name}=<(.*)>/)
    if value[0]
      str = value[0][0]
      # p str
      v = VcfHeaderParser.parse_field(line,@debug)
      id = v['ID']
      res[id] = v
    end
  end
  # p res
  res
end
format() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 168
def format 
  find_fields('FORMAT')
end
gatkcommandline() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 184
def gatkcommandline
  find_fields('GATKCommandLine')
end
info() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 180
def info
  find_fields('INFO')
end
meta() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 188
def meta
  return @meta if @meta
  res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
  @lines.each do | line |
    value = line.scan(/##(.*?)=(.*)/)
    if value[0]
      k,v = value[0]
      if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
        # p [k,v]
        res[k] = v
      end
    end
  end
  res['INFO'] = info()
  res['FORMAT'] = format()
  res['FILTER'] = filter()
  res['contig'] = contig()
  res['GATKCommandLine'] = gatkcommandline()
  # p [:res, res]
  @meta = res # cache values
  res
end
method_missing(m, *args, &block) click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 211
def method_missing(m, *args, &block)
  name = m.to_s
  value = find_field(name)
  return value if value
  raise "Unknown VCF header query '#{name}'"
end
num_samples() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 94
def num_samples
  @num_samples ||= ( samples == nil ? 0 : samples.size )
end
printable_header_line(fields) click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 72
def printable_header_line(fields)
  fields.map { | field |
    if field == '#samples'
      samples
    else
      field
    end
  }.join("\t")
end
sample_index() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 98
def sample_index
  return @sample_index if @sample_index
  index = {}
  samples.each_with_index { |k,i| index[k] = i+9 ; index[k.downcase] = i+9 }
  @sample_index = index
  index
end
sample_subset_index(list) click to toggle source

Give a list of samples (by index and/or name) and return 0-based index values The cache has to be able to hanle multiple lists - that is why it is a hash.

# File lib/bio-vcf/vcfheader.rb, line 108
def sample_subset_index list
  cached = @cached_filter_index[list]
  if cached
    l = cached
  else
    l = []
    list = samples_index_array() if not list
    list.each { |i|
      value = 
        begin 
          Integer(i)
        rescue
          idx = samples.index(i)
          if idx != nil
            idx
          else
            raise "Unknown sample name '#{i}'"
          end
        end
      l << value
    }
    @cached_filter_index[list] = l
  end
  l
end
samples() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 82
def samples
  @samples ||= if column_names.size > 8
                 column_names[9..-1]
               else
                 []
               end
end
samples_index_array() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 90
def samples_index_array
  @all_samples_index ||= column_names[9..-1].fill{|i| i}
end
tag(h) click to toggle source

Push a special key value list to the header

# File lib/bio-vcf/vcfheader.rb, line 51
def tag h
  h2 = h.dup
  [:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
  info = h2.map { |k,v| k.to_s.capitalize+'='+'"'+v.to_s+'"' }.join(',')
  line = '##BioVcf=<'+info+'>'
  @lines.insert(-2,line)
  line
end
version() click to toggle source
# File lib/bio-vcf/vcfheader.rb, line 60
def version
  @version ||= lines[0].scan(/##fileformat=VCFv(\d+\.\d+)/)[0][0]
end