class Bio::MAF::Access

Top-level class for working with a set of indexed MAF files. Provides a higher-level alternative to working with {Parser} and {KyotoIndex} objects directly.

Instantiate with {Access.maf_dir} and {Access.file} methods.

Attributes

block_filter[RW]

Block filter to apply. @return [Hash] @see KyotoIndex#find

indices[R]
parse_options[RW]

Parser options. @return [Hash] @see Parser

sequence_filter[RW]

Sequence filter to apply. @return [Hash] @see Parser#sequence_filter

Public Class Methods

file(maf, index=nil, options={}) click to toggle source

Provides access to a single MAF file. If this file is not indexed, it will be fully parsed to create a temporary in-memory index. For large MAF files or ones which will be used multiple times, this is inefficient, and an index file should be created with maf_index(1).

@param [String] maf path to MAF file @param [String] index Kyoto Cabinet index file @param [Hash] options parser options @return [Access]

# File lib/bio/maf/index.rb, line 108
def self.file(maf, index=nil, options={})
  o = options.dup
  o[:maf] = maf
  o[:index] = index if index
  self.new(o)
end
maf_dir(dir, options={}) click to toggle source

Provides access to a directory of indexed MAF files. Any files with .maf suffixes and accompanying .kct indexes in the given directory will be accessible. @param [String] dir directory to scan @param [Hash] options parser options @return [Access]

# File lib/bio/maf/index.rb, line 92
def self.maf_dir(dir, options={})
  o = options.dup
  o[:dir] = dir
  self.new(o)
end
new(options) click to toggle source

@api private

# File lib/bio/maf/index.rb, line 194
def initialize(options)
  @parse_options = options
  @indices = {}
  @maf_by_chrom = {}
  if options[:dir]
    scan_dir(options[:dir])
  elsif options[:maf]
    if options[:index]
      LOG.debug { "Opening index file #{options[:index]}" }
      index = KyotoIndex.open(options[:index])
      register_index(index,
                     options[:maf])
      index.close
    else
      idx_f = find_index_file(options[:maf])
      if idx_f
        index = KyotoIndex.open(idx_f)
        register_index(index, options[:maf])
        index.close
      end
    end
  else
    raise "Must specify :dir or :maf!"
  end
  if options[:maf] && @indices.empty?
    # MAF file explicitly given but no index
    # build a temporary one
    # (could build a real one, too...)
    maf = options[:maf]
    parser = Parser.new(maf, @parse_options)
    LOG.warn { "WARNING: building temporary index on #{maf}." }
    index = KyotoIndex.build(parser, '%')
    register_index(index, maf)
  end
end

Public Instance Methods

chrom_index(chrom) click to toggle source

@api private

# File lib/bio/maf/index.rb, line 264
def chrom_index(chrom)
  unless @indices.has_key? chrom
    raise "No index available for chromosome #{chrom}!"
  end
  index = @indices[chrom]
  if index.is_a? KyotoIndex
    # temporary
    index
  else
    KyotoIndex.open(index)
  end
end
close() click to toggle source

Close all open resources, in particular Kyoto Cabinet database handles.

# File lib/bio/maf/index.rb, line 117
def close
  @indices.values.each { |ki| ki.close }
end
find(intervals, &blk) click to toggle source

Find all alignment blocks in the genomic regions in the list of Bio::GenomicInterval objects, and parse them with the given parser.

@param [Enumerable<Bio::GenomicInterval>] intervals genomic

intervals to parse.

@yield [block] each {Block} matched, in turn @return [Array<Block>] each matching {Block}, if no block given @api public @see KyotoIndex#find

# File lib/bio/maf/index.rb, line 131
def find(intervals, &blk)
  if block_given?
    by_chrom = intervals.group_by { |i| i.chrom }
    by_chrom.keys.each do |chrom|
      unless @indices.has_key? chrom
        raise "No index available for chromosome #{chrom}!"
      end
    end
    by_chrom.each do |chrom, c_intervals|
      with_index(chrom) do |index|
        with_parser(chrom) do |parser|
          index.find(c_intervals, parser, block_filter, &blk)
        end
      end
    end
  else
    acc = []
    self.find(intervals) { |block| acc << block }
    acc
  end
end
find_index_file(maf) click to toggle source

@api private

# File lib/bio/maf/index.rb, line 231
def find_index_file(maf)
  dir = File.dirname(maf)
  base = File.basename(maf)
  noext = base.gsub(/\.maf.*/, '')
  idx = [base, noext].collect { |n| "#{dir}/#{n}.kct" }.find { |path| File.exist? path }
end
register_index(index, maf) click to toggle source

@api private

# File lib/bio/maf/index.rb, line 239
def register_index(index, maf)
  unless index.maf_file == File.basename(maf)
    raise "Index #{index.path} was created for #{index.maf_file}, not #{File.basename(maf)}!"
  end
  if index.path.to_s.start_with? '%'
    @indices[index.ref_seq] = index
  else
    @indices[index.ref_seq] = index.path.to_s
  end
  @maf_by_chrom[index.ref_seq] = maf
end
scan_dir(dir) click to toggle source

@api private

# File lib/bio/maf/index.rb, line 252
def scan_dir(dir)
  Dir.glob("#{dir}/*.kct").each do |index_f|
    index = KyotoIndex.open(index_f)
    maf = "#{dir}/#{index.maf_file}"
    if File.exist? maf
      register_index(index, maf)
    end
    index.close
  end
end
slice(interval, &blk) click to toggle source

Find and parse all alignment blocks in the genomic region given by a Bio::GenomicInterval, and truncate them to just the region intersecting that interval.

@param [Bio::GenomicInterval] interval interval to search @yield [block] each {Block} matched, in turn @return [Array<Block>] each matching {Block}, if no block given @api public @see KyotoIndex#slice

# File lib/bio/maf/index.rb, line 182
def slice(interval, &blk)
  with_index(interval.chrom) do |index|
    with_parser(interval.chrom) do |parser|
      s = index.slice(interval, parser, block_filter, &blk)
      block_given? ? s : s.to_a
    end
  end
end
tile(interval) { |tiler| ... } click to toggle source

Find and parse all alignment blocks in the genomic region given by a Bio::GenomicInterval, and combine them to synthesize a single alignment covering that interval exactly.

@param [Bio::GenomicInterval] interval interval to search @yield [tiler] a {Tiler} ready to operate on the given interval @api public

# File lib/bio/maf/index.rb, line 161
def tile(interval)
  with_index(interval.chrom) do |index|
    with_parser(interval.chrom) do |parser|
      tiler = Tiler.new
      tiler.index = index
      tiler.parser = parser
      tiler.interval = interval
      yield tiler
    end
  end
end
with_index(chrom) { |index| ... } click to toggle source
# File lib/bio/maf/index.rb, line 277
def with_index(chrom)
  index = chrom_index(chrom)
  LOG.debug { "Selected index #{index} for sequence #{chrom}." }
  begin
    yield index
  ensure
    index.close unless index.path.to_s.start_with? '%'
  end
end
with_parser(chrom) { |parser| ... } click to toggle source

@api private

# File lib/bio/maf/index.rb, line 288
def with_parser(chrom)
  LOG.debug { "Creating parser with options #{@parse_options.inspect}" }
  parser = Parser.new(@maf_by_chrom[chrom], @parse_options)
  parser.sequence_filter = self.sequence_filter
  begin
    yield parser
  ensure
    parser.close
  end
end