class MgNu::Sequence::Fasta

Attributes

header[RW]
header_description[RW]
header_name[RW]

Public Class Methods

new(options) click to toggle source

create a new MgNu::Sequence::Fasta object

Calls superclass method MgNu::Sequence::new
# File lib/mgnu/sequence/fasta.rb, line 9
def initialize(options)
  super(options)
  options = {:header => nil}.merge! options
  @header = options[:header]
  temp = @header.split
  @header_name = temp.shift
  @header_description = temp.length > 0 ? temp.join(' ') : nil
end

Public Instance Methods

sequence_by_columns(cols = 60) click to toggle source

split sequence into columns

# File lib/mgnu/sequence/fasta.rb, line 19
def sequence_by_columns(cols = 60)
  seq = ''
  if length < cols
    seq << sequence
  else
    0.step(length, cols) { |segment| seq << sequence[segment, cols] << "\n" }
  end
  seq
end
split_on_n(min_n = 10) click to toggle source

find runs of N characters in the sequence and split

# File lib/mgnu/sequence/fasta.rb, line 58
def split_on_n(min_n = 10)
  count = 0
  sequence_chunks = []
  sequence.split(/[nN]{#{min_n},}/).each do |chunk|
    sequence_chunks << chunk
    count += 1
  end

  if count > 1
    outstr = ''
    sequence_chunks.each_with_index do |chunk, i|
      outstr << ">#{@header_name}_#{i + 1} #{@header_description}\n"
      outstr << "#{chunk}\n"
    end
    outstr
  else
    to_s
  end
end
to_s(cols = 60) click to toggle source

override to_s string representation

# File lib/mgnu/sequence/fasta.rb, line 30
def to_s(cols = 60)
  seq = ''
  if sequence =~ /\d+\s+\d+/
    # this is a fasta quality sequence
    scores = sequence.split(/\s+/)
    buffer = []
    while scores.length > 0
      score = scores.shift
      if buffer.length == 17
        seq << "#{buffer.join(' ')}\n"
        buffer.clear
        buffer << score
      else
        buffer << score
      end
    end
    seq << "#{buffer.join(' ')}\n" if buffer.length > 0
  else
    if cols == -1       # don't break the sequence up
      seq = sequence
    else
      seq = length < cols ? sequence : sequence_by_columns(cols)
    end
  end
  ">#{@header}\n#{seq}"
end