class SequenceServer::Sequence::Retriever

Retrieve sequences from BLAST databases.

Attributes

database_ids[R]
in_file[R]
sequence_ids[R]
sequences[R]

Public Class Methods

new(sequence_ids, database_ids, in_file = false) click to toggle source
# File lib/sequenceserver/sequence.rb, line 178
def initialize(sequence_ids, database_ids, in_file = false)
  @sequence_ids = Array sequence_ids
  @database_ids = Array database_ids
  @in_file = in_file

  validate && create_entry_batch_file && run
end

Public Instance Methods

to_json(*_args) click to toggle source
# File lib/sequenceserver/sequence.rb, line 188
def to_json(*_args)
  {
    error_msgs: error_msgs,
    sequences: sequences.map(&:info)
  }.to_json
end

Private Instance Methods

create_entry_batch_file() click to toggle source

Create a temporary file containing sequence ids to fetch.

# File lib/sequenceserver/sequence.rb, line 251
def create_entry_batch_file
  @batch_file = Tempfile.new("#{Time.now}_batch").tap do |f|
    f.write(sequence_ids.join("\n"))
    f.flush
  end
end
database_names() click to toggle source
# File lib/sequenceserver/sequence.rb, line 215
def database_names
  Database[database_ids].map(&:name)
end
database_titles() click to toggle source
# File lib/sequenceserver/sequence.rb, line 219
def database_titles
  Database[database_ids].map(&:title)
end
error_msgs() click to toggle source

rubocop:disable Metrics/MethodLength

# File lib/sequenceserver/sequence.rb, line 259
      def error_msgs
        return [] if sequences.length == sequence_ids.length

        [
          ['ERROR: incorrect number of sequences found.',
           <<~MSG
             You requested #{sequence_ids.length} sequence(s) with the following
             identifiers:
               #{sequence_ids.join(', ')}
             from the following databases:
               #{database_titles.join(', ')}
             but we found #{sequences.length} sequence(s).

             This is likley due to a problem with how databases are formatted.
             Please share this text with the person managing this website.

             If you are the admin and are confident that your databases are
             correctly formatted, you have likely encountered a weird bug.
             In this case, please raise an issue at:
             https://github.com/wurmlab/sequenceserver/issues

             If any sequences were retrieved, you can find them below
             (but some may be incorrect, so be careful!)
           MSG
          ]
        ]
      end
run() click to toggle source
# File lib/sequenceserver/sequence.rb, line 197
def run
  command = "blastdbcmd -outfmt '%g       %i    %a %t      %s'" \
            " -db '#{database_names.join(' ')}'" \
            " -entry_batch '#{@batch_file.path}'"

  out, = sys(command, path: config[:bin])
  @sequences = out.each_line.map do |line|
    # Stop codons in amino acid sequence databases show up as invalid
    # UTF-8 characters in the output and cause the subsequent call to
    # `split` to fail. We replace invalid UTF-8 characters with X.
    line = line.encode('UTF-8', invalid: :replace, replace: 'X')
    Sequence.new(*line.chomp.split('      '))
  end

  @batch_file.unlink
  extend(IO) && write if in_file
end
validate() click to toggle source
# File lib/sequenceserver/sequence.rb, line 223
def validate
  ids = Database.ids
  unless database_ids.is_a?(Array) &&
         !database_ids.empty? &&
         (ids & database_ids).length == database_ids.length

    fail(
      DatabaseUnreachableError,
      "Database id should be one of: #{ids.join("\n")}"
    )
  end

  invalid_sequence_ids = sequence_ids.reject do |id|
    id =~ SequenceServer::BLAST::VALID_SEQUENCE_ID
  end

  unless invalid_sequence_ids.empty?
    fail(
      InvalidSequenceIdError,
      "Invalid sequence id(s): #{invalid_sequence_ids.join(', ')}"
    )
  end

  true
end