class SequenceServer::Sequence::Retriever
Retrieve sequences from BLAST
databases.
Attributes
database_ids[R]
in_file[R]
sequence_ids[R]
sequences[R]
Public Class Methods
new(sequence_ids, database_ids, in_file = false)
click to toggle source
# File lib/sequenceserver/sequence.rb, line 178 def initialize(sequence_ids, database_ids, in_file = false) @sequence_ids = Array sequence_ids @database_ids = Array database_ids @in_file = in_file validate && run end
Public Instance Methods
to_json(*_args)
click to toggle source
# File lib/sequenceserver/sequence.rb, line 188 def to_json(*_args) { error_msgs: error_msgs, sequences: sequences.map(&:info) }.to_json end
Private Instance Methods
database_names()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 214 def database_names Database[database_ids].map(&:name) end
database_titles()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 218 def database_titles Database[database_ids].map(&:title) end
error_msgs()
click to toggle source
rubocop:disable Metrics/MethodLength
# File lib/sequenceserver/sequence.rb, line 232 def error_msgs return [] if sequences.length == sequence_ids.length [ ['ERROR: incorrect number of sequences found.', <<~MSG You requested #{sequence_ids.length} sequence(s) with the following identifiers: #{sequence_ids.join(', ')} from the following databases: #{database_titles.join(', ')} but we found #{sequences.length} sequence(s). This is likley due to a problem with how databases are formatted. Please share this text with the person managing this website. If you are the admin and are confident that your databases are correctly formatted, you have likely encountered a weird bug. In this case, please raise an issue at: https://github.com/wurmlab/sequenceserver/issues If any sequences were retrieved, you can find them below (but some may be incorrect, so be careful!) MSG ] ] end
run()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 197 def run command = "blastdbcmd -outfmt '%g %i %a %t %s'" \ " -db '#{database_names.join(' ')}'" \ " -entry '#{sequence_ids.join(',')}'" out, = sys(command, path: config[:bin]) @sequences = out.each_line.map do |line| # Stop codons in amino acid sequence databases show up as invalid # UTF-8 characters in the output and cause the subsequent call to # `split` to fail. We replace invalid UTF-8 characters with X. line = line.encode('UTF-8', invalid: :replace, replace: 'X') Sequence.new(*line.chomp.split(' ')) end extend(IO) && write if in_file end
validate()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 222 def validate ids = Database.ids return true if database_ids.is_a?(Array) && !database_ids.empty? && (ids & database_ids).length == database_ids.length fail DatabaseUnreachableError, 'Database id should be one of:' \ " #{ids.join("\n")}" end