class Bio::MAF::Block
A MAF
alignment block. @api public
Constants
- GAP
Attributes
Offset of the alignment block within the MAF
file, in bytes. @return [Integer]
Original text of the MAF
block. Only available if the :retain_text parser option is set. @return [String]
Sequences, one per ‘s’ or ‘e’ line. @return [Array<Sequence>]
Size of the alignment block within the MAF
file, in bytes. @return [Integer]
Parameters from the ‘a’ line starting the alignment block.
Public Class Methods
# File lib/bio/maf/maf.rb, line 70 def initialize(vars, sequences, offset, size, filtered) @vars = vars @sequences = sequences @offset = offset @size = size @filtered = filtered end
Public Instance Methods
# File lib/bio/maf/maf.rb, line 177 def _slice(interval) range = _slice_text_range(interval) s2 = sequences.collect { |s| s.slice(range) } v2 = vars.dup #v2[:score] = '0.0' # TODO: should the filtered param be #modified? instead? Block.new(v2, s2, offset, size, @filtered) end
# File lib/bio/maf/maf.rb, line 186 def _slice_text_range(interval) i_start = interval.zero_start i_end = interval.zero_end g_pos = ref_seq.start t_start = nil t_end = nil ref_seq.text.each_char.each_with_index do |c, t_pos| if c != '-' # non-gap if g_pos == i_start t_start = t_pos end g_pos += 1 if t_start && g_pos == i_end t_end = t_pos + 1 break end end end unless t_start && t_end raise "did not find start and end for #{interval} in #{ref_seq.inspect}!" end return t_start...t_end end
# File lib/bio/maf/maf.rb, line 86 def each_raw_seq sequences.each { |s| yield s } end
Whether this block has been modified by a parser filter. @return [Boolean]
# File lib/bio/maf/maf.rb, line 103 def filtered? @filtered end
Find gaps present in all sequences. These would generally occur when some sequences have been filtered out.
@see remove_gaps!
@see Parser#sequence_filter
# File lib/bio/maf/maf.rb, line 126 def find_gaps ref_s = StringScanner.new(sequences.first.text) others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) } gaps = [] while ref_s.scan_until(GAP) offset = ref_s.pos - ref_s.matched_size others.each { |s| s.pos = offset } unless others.find { |s| ! s.scan(GAP) } # all matched gap_size = [ref_s.matched_size, others.map {|s| s.matched_size}.min].min gaps << [offset, gap_size] end end gaps end
# File lib/bio/maf/maf.rb, line 229 def join(other) nseq = sequences.each_with_index.collect do |s1, i| s2 = other.seq_from(s1.source, i) s1.join(s2) end v2 = vars.dup v2[:score] = '0.0' Block.new(v2, nseq, offset, nil, @filtered) end
# File lib/bio/maf/maf.rb, line 211 def joinable_with?(other) if sequences.size == other.sequences.size r1 = ref_seq r2 = other.ref_seq return false if r1.source != r2.source return false if r1.end != r2.start rest = sequences.each_with_index rest.next mismatch = rest.find do |s1, i| s2 = other.seq_from(s1.source, i) (! s2) || ! s1.joinable_with?(s2) end return (! mismatch) else return false end end
# File lib/bio/maf/maf.rb, line 82 def raw_seq(i) sequences.fetch(i) end
# File lib/bio/maf/maf.rb, line 78 def ref_seq sequences[0] end
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
@see find_gaps
@see Parser#sequence_filter
# File lib/bio/maf/maf.rb, line 148 def remove_gaps! gaps = find_gaps() gaps.reverse_each do |offset, len| sequences.each do |seq| seq.delete_text(offset, len) end end gaps.size end
# File lib/bio/maf/maf.rb, line 239 def seq_from(src, pos_guess) sg = sequences[pos_guess] if sg.source == src sg else sequences.find { |s| s.source == src } end end
Returns a new Block
covering only the region where it overlaps the given interval. @param [Bio::GenomicInterval] interval to slice the block with @return [Block] block covering intersection with interval
# File lib/bio/maf/maf.rb, line 162 def slice(interval) case interval.compare(ref_seq.interval) when :equal return self when :contains, :contained_by, :left_overlapped, :right_overlapped _slice(interval.intersection(ref_seq.interval)) when :left_adjacent, :right_adjacent, :left_off, :right_off raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}" when :different_chrom raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!" else raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}" end end
Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.
# File lib/bio/maf/maf.rb, line 93 def text_size sequences.first.text.size end
# File lib/bio/maf/maf.rb, line 107 def to_bio_alignment ba_seq = sequences.collect { |s| s.to_bio_alignment } Bio::BioAlignment::Alignment.new(ba_seq) end
# File lib/bio/maf/maf.rb, line 112 def to_s buf = StringIO.new writer = Writer.new(buf) writer.write_block(self) return buf.string end
# File lib/bio/maf/maf.rb, line 97 def upcase! sequences.each { |s| s.upcase! } end