class DwcaHunter::ResourceION

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/ion.rb, line 5
def initialize(opts = {})
  @command = "ion"
  @title = "Index to Organism Names"
  @url = "https://uofi.box.com/shared/static/tklh8i6q2kb33g6ki33k6s3is06lo9np.gz"
  @UUID = "1137dfa3-5b8c-487d-b497-dc0938605864"
  @download_path = File.join(Dir.tmpdir,
                             "dwca_hunter",
                             "ion",
                             "data.tar.gz")
  @names = []
  @extensions = []
  super(opts)
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 19
def download
  puts "Downloading cached verion of the file. Ask Rod Page to make new."
  `curl -s -L #{@url} -o #{@download_path}`
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 28
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  get_names
  generate_dwca
end
unpack() click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 24
def unpack
  unpack_tar
end

Private Instance Methods

collect_names() click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 41
def collect_names
  file = CSV.open(File.join(@download_dir, "ion.tsv"),
                  headers: true, col_sep: "\t", quote_char: "щ")
  file.each_with_index do |row, i|
    id = row["id"]
    name_string = row["nameComplete"]
    auth = row["taxonAuthor"]

    @names << { taxon_id: id,
                name_string: name_string,
                auth: auth }

    puts "Processed %s names" % i if i % 10_000 == 0
  end
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/ion.rb, line 57
def generate_dwca
  DwcaHunter.logger_write(object_id,
                          "Creating DarwinCore Archive file")
  @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"]]
  @names.each do |n|
    @core << [n[:taxon_id], n[:name_string], n[:auth]]
  end

  @eml = {
    id: @uuid,
    title: @title,
    authors: [
      { first_name: "Nigel",
        last_name: "Robinson",
        email: "nigel.robinson@thomsonreuters.com" }
    ],
    metadata_providers: [
      { first_name: "Dmitry",
        last_name: "Mozzherin",
        email: "dmozzherin@gmail.com" }
    ],
    abstract: "ION contains millions of animal names, both fossil and " \
      "recent, at all taxonomic ranks, reported from the scientific " \
      "literature. (Bacteria, plant and virus names will be added soon)." \
      "\n\n" \
      "These names are derived from premier Clarivate databases: " \
      "Zoological Record®, BIOSIS Previews®, and Biological Abstracts®. " \
      "All names are tied to at least one published article. Together, " \
      "these resources cover every aspect of the life sciences - " \
      "providing names from over 30 million scientific records, " \
      "including approximately ,000 international journals, patents, " \
      "books, and conference proceedings. They provide a powerful " \
      "foundation for the most complete collection of organism names " \
      "available today.",
    url: @url
  }
  super
end
get_names() click to toggle source
# File lib/dwca_hunter/resources/ion.rb, line 36
def get_names
  Dir.chdir(@download_dir)
  collect_names
end