class MgNu::Sequence

Attributes

sequence[RW]
sequence=[RW]
type[RW]
value[RW]

Public Class Methods

new(options) click to toggle source
# File lib/mgnu/sequence.rb, line 9
def initialize(options)
  options = {:value => nil, :type => nil}.merge! options
  @value = options[:value]
  @value = options[:sequence] if options.key?(:sequence)
  @type = options[:type]
end

Public Instance Methods

aa?() click to toggle source
# File lib/mgnu/sequence.rb, line 27
def aa?
  @type == 'aa' || @type == 'aminoacid' || @type == 'protein' ? true : false
end
Also aliased as: protein?, aminoacid?
aminoacid?()
Alias for: aa?
complement() click to toggle source

returns a string

# File lib/mgnu/sequence.rb, line 39
def complement
  if @type == 'rna'
    @value.tr('ucgtrymkdhvbUCGTRYMKDHVB', 'agcuyrkmhdbvAGCUYRKMHDBV')
  else
    @value.tr('acgtrymkdhvbACGTRYMKDHVB', 'tgcayrkmhdbvTGCAYRKMHDBV')
  end
end
complement!() click to toggle source

changes sequence @value

# File lib/mgnu/sequence.rb, line 48
def complement!
  @value = complement
end
distance(other)
dna?() click to toggle source
# File lib/mgnu/sequence.rb, line 23
def dna?
  @type == 'dna' ? true : false
end
gc_content() click to toggle source
# File lib/mgnu/sequence.rb, line 188
def gc_content
  return -1 if @value == '' || @value.nil?
  base2count = {'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'U' => 0,
                'R' => 0, 'Y' => 0, 'M' => 0, 'K' => 0, 'W' => 0,
                'S' => 0, 'B' => 0, 'D' => 0, 'H' => 0, 'V' => 0}
  temp = @value.split(//)
  temp.each do |base|
    next if base == '*' || base.upcase == 'N'
    if base2count.key?(base.upcase)
      base2count[base.upcase] += 1
    else
      $stderr.puts "Unknown character #{base.upcase}"
    end
  end
  gc = base2count['G'] + base2count['C'] + base2count['R'] + base2count['K'] + base2count['S'] + base2count['B'] + base2count['D'] + base2count['V']
  total = base2count.values.inject(0) { |a, e| a + e.nil? ? 0 : e }
  format('%.4f', (gc.to_f / total.to_f))
end
identity(other)
Alias for: percent_identity
length() click to toggle source
# File lib/mgnu/sequence.rb, line 34
def length
  @value.nil? ? nil : @value.length
end
levenshtein_distance(other) click to toggle source
# File lib/mgnu/sequence.rb, line 126
def levenshtein_distance(other)
  # initialize
  a, b, m = '', '', []

  # one or the other strings are empty or the strings are the same
  return -1 if @value.nil? || @value == ''
  a = @value.upcase

  if other.class == MgNu::Sequence
    return -1 if other.value == '' || other.value.nil?
    b = other.value.upcase
    return 0 if other.value.upcase == @value.upcase
  elsif other.class == String
    return -1 if other == ''
    b = other.upcase
    return 0 if other.upcase == @value.upcase
  end

  0.upto(a.length) { |x| m[x] = [x] }
  1.upto(b.length) { |x| m[0] << x }

  1.upto(a.length) do |x|
    1.upto(b.length) do |y|
      cost = a[x - 1] == b[y - 1] ? 0 : 1
      m[x][y] = [m[x - 1][y] + 1, m[x][y - 1] + 1, m[x - 1][y - 1] + cost].min
    end
  end
  m[-1][-1]
end
Also aliased as: distance
nblocks(length = 10) click to toggle source

returns an array of 1-based positon ranges after splitting on N-blocks > length

# File lib/mgnu/sequence.rb, line 114
def nblocks(length = 10)
  pieces = []
  prev = 1
  seq = StringScanner.new(value) # the sequence
  while seq.scan_until(/[Nn]{#{length},}/) # only splits at N stitches that are >10, but that can be changed
    pieces << (prev .. seq.pos - seq.matched.length)
    prev = seq.pos + 1
  end
  pieces << (prev .. value.length) # add last piece
  pieces
end
percent_identity(other) click to toggle source
# File lib/mgnu/sequence.rb, line 158
def percent_identity(other)
  # one or the other strings are empty or the strings are the same
  return -1 if @value.nil? || @value == ''
  a = @value
  b = ''

  if other.class == MgNu::Sequence
    return -1 if other.value == '' || other.value.nil?
    b = other.value
    return 1.0 if other.value == @value
  elsif other.class == String
    return -1 if other == ''
    b = other
    return 1.0 if other == @value
  end

  if a.length != b.length
    warn('lengths differ - percent identity may is probably inaccurate')
  end

  match = 0
  a.split(//).each_with_index do |char, i|
    match += 1 if char.upcase == b[i].chr.upcase
  end

  a.length >= b.length ?  match / a.length.to_f : match / b.length.to_f
end
Also aliased as: identity
protein?()
Alias for: aa?
revcomp()
Alias for: reverse_complement
revcomp!()
Alias for: reverse_complement!
reverse_complement() click to toggle source
# File lib/mgnu/sequence.rb, line 52
def reverse_complement
  complement.reverse
end
Also aliased as: revcomp
reverse_complement!() click to toggle source
# File lib/mgnu/sequence.rb, line 57
def reverse_complement!
  @value = complement.reverse
end
Also aliased as: revcomp!
rna?() click to toggle source
# File lib/mgnu/sequence.rb, line 19
def rna?
  @type == 'rna' ? true : false
end
to_genbank() click to toggle source

Genbank formatted sequence 6 cols w/10 letters each, right justified line numbers

 1 tcctgatctc ctttatagca ctttccgtga aaattgccaa gcgacctgca tgagttccgg
61 gagcgagaac ttctgcattt aactcacgag gagtaacaat atccactcca ggcagattcc

121 tgaaaccctt cagaacatta tccttgttgg atacaactat caaaacgctc ttctttttct

# File lib/mgnu/sequence.rb, line 102
def to_genbank
  i = 1
  result = @value.gsub(/.{1,60}/) do |s|
    s = s.gsub(/.{1,10}/, ' \0')
    y = format('%9d%s\n', i, s)
    i += 60
    y
  end
  result
end
to_s(cols = 60) click to toggle source
# File lib/mgnu/sequence.rb, line 88
def to_s(cols = 60)
  seq = ''
  if @value.length < cols
    seq = @value
  else
    0.step(@value.length, cols) { |segment| seq += @value[segment, cols] + "\n" }
  end
  seq
end
translate(frame = 1, cdn_table = MgNu::BACTERIA_CODONS) click to toggle source
# File lib/mgnu/sequence.rb, line 62
def translate(frame = 1, cdn_table = MgNu::BACTERIA_CODONS)
  from, sequence = nil, @value

  case frame
  when 1, 2, 3
    from = frame - 1
  when 4, 5, 6
    from = frame - 4
    sequence = reverse_complement
  when -1, -2, -3
    from = -1 - frame
    sequence = reverse_complement
  else
    $stderr.puts 'unknown frame - defaulting to zero (0)'
    from = 0
  end

  nalen = sequence.length - from
  nalen -= nalen % 3
  sequence[from, nalen].downcase.gsub(/.{3}/) { |codon| cdn_table[codon] || 'X' }
end
translate!(frame = 1, cdn_table = MgNu::BACTERIA_CODONS) click to toggle source
# File lib/mgnu/sequence.rb, line 84
def translate!(frame = 1, cdn_table = MgNu::BACTERIA_CODONS)
  @value = translate(frame, cdn_table)
end