class Bio::MAF::Sequence
A sequence within an alignment block. @api public
Constants
- I_STATUS
Attributes
Array of raw synteny information from ‘i’ line. @return [Array<String>]
Quality string from ‘q’ line. @return [String]
@return [Integer] Size of aligning region in source sequence.
@return [String] Source sequence name.
Size of the entire source sequence, not just the aligning region. @return [Integer]
Size of the entire source sequence, not just the aligning region. @return [Integer]
@return [Integer] Zero-based start position.
:+ or :-, indicating which strand the alignment is to. @return [Symbol]
Sequence
data for the alignment, including insertions. @return [String]
Public Class Methods
# File lib/bio/maf/maf.rb, line 277 def initialize(source, start, size, strand, src_size, text) @source = source @start = start @size = size @strand = strand @src_size = src_size @text = text end
Public Instance Methods
# File lib/bio/maf/maf.rb, line 332 def decode_status_char(c) I_STATUS[c] || raise("Unsupported status character #{c}!") end
# File lib/bio/maf/maf.rb, line 365 def delete_text(offset, len) unless empty? text.slice!(offset, len) if quality quality.slice!(offset, len) end end end
Whether this sequence is empty. Only true for {EmptySequence} instances from ‘e’ lines.
# File lib/bio/maf/maf.rb, line 315 def empty? false end
# File lib/bio/maf/maf.rb, line 286 def end start + size end
# File lib/bio/maf/maf.rb, line 382 def fasta_desc "#{source}:#{start}-#{start + size}" end
# File lib/bio/maf/maf.rb, line 319 def gapped? size != text.size end
# File lib/bio/maf/maf.rb, line 290 def interval GenomicInterval.zero_based(self.source, self.start, self.end) end
# File lib/bio/maf/maf.rb, line 392 def join(o) s2 = Sequence.new(source, start, size + o.size, strand, src_size, text + o.text) if quality && o.quality s2.quality = quality + o.quality end s2 end
# File lib/bio/maf/maf.rb, line 386 def joinable_with?(o) (self.end == o.start) \ && (self.strand == o.strand) \ && (self.empty? == o.empty?) end
# File lib/bio/maf/maf.rb, line 344 def left_count i_data && i_data[1].to_i end
# File lib/bio/maf/maf.rb, line 340 def left_status i_data && decode_status_char(left_status_char()) end
# File lib/bio/maf/maf.rb, line 336 def left_status_char i_data && i_data[0] end
# File lib/bio/maf/maf.rb, line 356 def right_count i_data && i_data[3].to_i end
# File lib/bio/maf/maf.rb, line 352 def right_status i_data && decode_status_char(right_status_char()) end
# File lib/bio/maf/maf.rb, line 348 def right_status_char i_data && i_data[2] end
# File lib/bio/maf/maf.rb, line 294 def slice(range) before = text.slice(0...(range.begin)) non_gap_prev = before.delete("-").size new_text = text.slice(range) unless new_text raise "could not extract slice #{range} from #{self.inspect}!" end non_gap_text = new_text.delete("-").size s2 = Sequence.new(source, start + non_gap_prev, non_gap_text, strand, src_size, new_text) s2.quality = quality.slice(range) if quality # TODO: what to do with synteny data? s2 end
# File lib/bio/maf/maf.rb, line 360 def species parts = source.split('.', 2) parts.size == 2 ? parts[0] : nil end
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from text
.
@see String#slice
# File lib/bio/maf/maf.rb, line 410 def text_range(range) r_end = range.exclude_end? ? range.end : range.end + 1 r_size = r_end - range.begin if range.begin == start && r_size == size # special case, entire text 0...text.size else if range.begin < start || r_end > self.end raise "Range #{range} outside sequence bounds; start #{start}, size #{size}" end if ! gapped? # no gaps, can map indexes directly (range.begin - start)...(r_end - start) else # gaps present g_start = start # genomic position of the start t_start = 0 # text position of the start m_begin = nil # beginning of match match = nil text.scan(/(\w+|-+)/) do |parts| part = parts[0] if part[0] != '-' # sequence text g_end = g_start + part.size if g_start <= range.begin && range.begin < g_end offset_in_part = range.begin - g_start m_begin = offset_in_part + t_start end if g_start <= r_end && r_end <= g_end raise "reached end before start!" unless m_begin offset_in_part = r_end - g_start m_end = offset_in_part + t_start match = m_begin...m_end break end g_start = g_end else # gap end t_start += part.size end raise "no match found!" unless match return match end end end
# File lib/bio/maf/maf.rb, line 378 def to_bio_alignment Bio::BioAlignment::Sequence.new(source, text) end
# File lib/bio/maf/maf.rb, line 374 def upcase! text.upcase! end