class Bio::MAF::Access
Top-level class for working with a set of indexed MAF
files. Provides a higher-level alternative to working with {Parser} and {KyotoIndex} objects directly.
Instantiate with {Access.maf_dir} and {Access.file} methods.
Attributes
Block
filter to apply. @return [Hash] @see KyotoIndex#find
Sequence
filter to apply. @return [Hash] @see Parser#sequence_filter
Public Class Methods
Provides access to a single MAF
file. If this file is not indexed, it will be fully parsed to create a temporary in-memory index. For large MAF
files or ones which will be used multiple times, this is inefficient, and an index file should be created with maf_index(1).
@param [String] maf path to MAF
file @param [String] index Kyoto Cabinet index file @param [Hash] options parser options @return [Access]
# File lib/bio/maf/index.rb, line 108 def self.file(maf, index=nil, options={}) o = options.dup o[:maf] = maf o[:index] = index if index self.new(o) end
Provides access to a directory of indexed MAF
files. Any files with .maf suffixes and accompanying .kct indexes in the given directory will be accessible. @param [String] dir directory to scan @param [Hash] options parser options @return [Access]
# File lib/bio/maf/index.rb, line 92 def self.maf_dir(dir, options={}) o = options.dup o[:dir] = dir self.new(o) end
@api private
# File lib/bio/maf/index.rb, line 194 def initialize(options) @parse_options = options @indices = {} @maf_by_chrom = {} if options[:dir] scan_dir(options[:dir]) elsif options[:maf] if options[:index] LOG.debug { "Opening index file #{options[:index]}" } index = KyotoIndex.open(options[:index]) register_index(index, options[:maf]) index.close else idx_f = find_index_file(options[:maf]) if idx_f index = KyotoIndex.open(idx_f) register_index(index, options[:maf]) index.close end end else raise "Must specify :dir or :maf!" end if options[:maf] && @indices.empty? # MAF file explicitly given but no index # build a temporary one # (could build a real one, too...) maf = options[:maf] parser = Parser.new(maf, @parse_options) LOG.warn { "WARNING: building temporary index on #{maf}." } index = KyotoIndex.build(parser, '%') register_index(index, maf) end end
Public Instance Methods
@api private
# File lib/bio/maf/index.rb, line 264 def chrom_index(chrom) unless @indices.has_key? chrom raise "No index available for chromosome #{chrom}!" end index = @indices[chrom] if index.is_a? KyotoIndex # temporary index else KyotoIndex.open(index) end end
Close all open resources, in particular Kyoto Cabinet database handles.
# File lib/bio/maf/index.rb, line 117 def close @indices.values.each { |ki| ki.close } end
Find all alignment blocks in the genomic regions in the list of Bio::GenomicInterval
objects, and parse them with the given parser.
@param [Enumerable<Bio::GenomicInterval>] intervals genomic
intervals to parse.
@yield [block] each {Block} matched, in turn @return [Array<Block>] each matching {Block}, if no block given @api public @see KyotoIndex#find
# File lib/bio/maf/index.rb, line 131 def find(intervals, &blk) if block_given? by_chrom = intervals.group_by { |i| i.chrom } by_chrom.keys.each do |chrom| unless @indices.has_key? chrom raise "No index available for chromosome #{chrom}!" end end by_chrom.each do |chrom, c_intervals| with_index(chrom) do |index| with_parser(chrom) do |parser| index.find(c_intervals, parser, block_filter, &blk) end end end else acc = [] self.find(intervals) { |block| acc << block } acc end end
@api private
# File lib/bio/maf/index.rb, line 231 def find_index_file(maf) dir = File.dirname(maf) base = File.basename(maf) noext = base.gsub(/\.maf.*/, '') idx = [base, noext].collect { |n| "#{dir}/#{n}.kct" }.find { |path| File.exist? path } end
@api private
# File lib/bio/maf/index.rb, line 239 def register_index(index, maf) unless index.maf_file == File.basename(maf) raise "Index #{index.path} was created for #{index.maf_file}, not #{File.basename(maf)}!" end if index.path.to_s.start_with? '%' @indices[index.ref_seq] = index else @indices[index.ref_seq] = index.path.to_s end @maf_by_chrom[index.ref_seq] = maf end
@api private
# File lib/bio/maf/index.rb, line 252 def scan_dir(dir) Dir.glob("#{dir}/*.kct").each do |index_f| index = KyotoIndex.open(index_f) maf = "#{dir}/#{index.maf_file}" if File.exist? maf register_index(index, maf) end index.close end end
Find and parse all alignment blocks in the genomic region given by a Bio::GenomicInterval
, and truncate them to just the region intersecting that interval.
@param [Bio::GenomicInterval] interval interval to search @yield [block] each {Block} matched, in turn @return [Array<Block>] each matching {Block}, if no block given @api public @see KyotoIndex#slice
# File lib/bio/maf/index.rb, line 182 def slice(interval, &blk) with_index(interval.chrom) do |index| with_parser(interval.chrom) do |parser| s = index.slice(interval, parser, block_filter, &blk) block_given? ? s : s.to_a end end end
Find and parse all alignment blocks in the genomic region given by a Bio::GenomicInterval
, and combine them to synthesize a single alignment covering that interval exactly.
@param [Bio::GenomicInterval] interval interval to search @yield [tiler] a {Tiler} ready to operate on the given interval @api public
# File lib/bio/maf/index.rb, line 161 def tile(interval) with_index(interval.chrom) do |index| with_parser(interval.chrom) do |parser| tiler = Tiler.new tiler.index = index tiler.parser = parser tiler.interval = interval yield tiler end end end
# File lib/bio/maf/index.rb, line 277 def with_index(chrom) index = chrom_index(chrom) LOG.debug { "Selected index #{index} for sequence #{chrom}." } begin yield index ensure index.close unless index.path.to_s.start_with? '%' end end
@api private
# File lib/bio/maf/index.rb, line 288 def with_parser(chrom) LOG.debug { "Creating parser with options #{@parse_options.inspect}" } parser = Parser.new(@maf_by_chrom[chrom], @parse_options) parser.sequence_filter = self.sequence_filter begin yield parser ensure parser.close end end