class DwcaHunter::ResourceION
Public Class Methods
new(opts = {})
click to toggle source
Calls superclass method
DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/ion.rb, line 5 def initialize(opts = {}) @command = "ion" @title = "Index to Organism Names" @url = "https://uofi.box.com/shared/static/tklh8i6q2kb33g6ki33k6s3is06lo9np.gz" @UUID = "1137dfa3-5b8c-487d-b497-dc0938605864" @download_path = File.join(Dir.tmpdir, "dwca_hunter", "ion", "data.tar.gz") @names = [] @extensions = [] super(opts) end
Public Instance Methods
download()
click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 19 def download puts "Downloading cached verion of the file. Ask Rod Page to make new." `curl -s -L #{@url} -o #{@download_path}` end
make_dwca()
click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 28 def make_dwca DwcaHunter.logger_write(object_id, "Extracting data") get_names generate_dwca end
unpack()
click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 24 def unpack unpack_tar end
Private Instance Methods
collect_names()
click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 41 def collect_names file = CSV.open(File.join(@download_dir, "ion.tsv"), headers: true, col_sep: "\t", quote_char: "щ") file.each_with_index do |row, i| id = row["id"] name_string = row["nameComplete"] auth = row["taxonAuthor"] @names << { taxon_id: id, name_string: name_string, auth: auth } puts "Processed %s names" % i if i % 10_000 == 0 end end
generate_dwca()
click to toggle source
Calls superclass method
DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/ion.rb, line 57 def generate_dwca DwcaHunter.logger_write(object_id, "Creating DarwinCore Archive file") @core = [["http://rs.tdwg.org/dwc/terms/taxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"]] @names.each do |n| @core << [n[:taxon_id], n[:name_string], n[:auth]] end @eml = { id: @uuid, title: @title, authors: [ { first_name: "Nigel", last_name: "Robinson", email: "nigel.robinson@thomsonreuters.com" } ], metadata_providers: [ { first_name: "Dmitry", last_name: "Mozzherin", email: "dmozzherin@gmail.com" } ], abstract: "ION contains millions of animal names, both fossil and " \ "recent, at all taxonomic ranks, reported from the scientific " \ "literature. (Bacteria, plant and virus names will be added soon)." \ "\n\n" \ "These names are derived from premier Clarivate databases: " \ "Zoological Record®, BIOSIS Previews®, and Biological Abstracts®. " \ "All names are tied to at least one published article. Together, " \ "these resources cover every aspect of the life sciences - " \ "providing names from over 30 million scientific records, " \ "including approximately ,000 international journals, patents, " \ "books, and conference proceedings. They provide a powerful " \ "foundation for the most complete collection of organism names " \ "available today.", url: @url } super end
get_names()
click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 36 def get_names Dir.chdir(@download_dir) collect_names end