class SequenceServer::Sequence::Retriever

Retrieve sequences from BLAST databases.

Attributes

database_ids[R]
in_file[R]
sequence_ids[R]
sequences[R]

Public Class Methods

new(sequence_ids, database_ids, in_file = false) click to toggle source
# File lib/sequenceserver/sequence.rb, line 178
def initialize(sequence_ids, database_ids, in_file = false)
  @sequence_ids = Array sequence_ids
  @database_ids = Array database_ids
  @in_file = in_file

  validate && run
end

Public Instance Methods

to_json(*_args) click to toggle source
# File lib/sequenceserver/sequence.rb, line 188
def to_json(*_args)
  {
    error_msgs: error_msgs,
    sequences: sequences.map(&:info)
  }.to_json
end

Private Instance Methods

database_names() click to toggle source
# File lib/sequenceserver/sequence.rb, line 214
def database_names
  Database[database_ids].map(&:name)
end
database_titles() click to toggle source
# File lib/sequenceserver/sequence.rb, line 218
def database_titles
  Database[database_ids].map(&:title)
end
error_msgs() click to toggle source

rubocop:disable Metrics/MethodLength

# File lib/sequenceserver/sequence.rb, line 232
      def error_msgs
        return [] if sequences.length == sequence_ids.length

        [
          ['ERROR: incorrect number of sequences found.',
           <<~MSG
             You requested #{sequence_ids.length} sequence(s) with the following
             identifiers:
               #{sequence_ids.join(', ')}
             from the following databases:
               #{database_titles.join(', ')}
             but we found #{sequences.length} sequence(s).

             This is likley due to a problem with how databases are formatted.
             Please share this text with the person managing this website.

             If you are the admin and are confident that your databases are
             correctly formatted, you have likely encountered a weird bug.
             In this case, please raise an issue at:
             https://github.com/wurmlab/sequenceserver/issues

             If any sequences were retrieved, you can find them below
             (but some may be incorrect, so be careful!)
           MSG
          ]
        ]
      end
run() click to toggle source
# File lib/sequenceserver/sequence.rb, line 197
def run
  command = "blastdbcmd -outfmt '%g       %i    %a %t      %s'" \
            " -db '#{database_names.join(' ')}'" \
            " -entry '#{sequence_ids.join(',')}'"

  out, = sys(command, path: config[:bin])

  @sequences = out.each_line.map do |line|
    # Stop codons in amino acid sequence databases show up as invalid
    # UTF-8 characters in the output and cause the subsequent call to
    # `split` to fail. We replace invalid UTF-8 characters with X.
    line = line.encode('UTF-8', invalid: :replace, replace: 'X')
    Sequence.new(*line.chomp.split('      '))
  end
  extend(IO) && write if in_file
end
validate() click to toggle source
# File lib/sequenceserver/sequence.rb, line 222
def validate
  ids = Database.ids
  return true if database_ids.is_a?(Array) && !database_ids.empty? &&
                 (ids & database_ids).length == database_ids.length

  fail DatabaseUnreachableError, 'Database id should be one of:' \
                      " #{ids.join("\n")}"
end