class DwcaHunter::ResourceSherborn

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/sherborn.rb, line 5
def initialize(opts = {})
  @command = "sherborn"
  @title = "Index Animalium"
  @url = "https://uofi.box.com/shared/static/kj8a26a3bcrraa4kccoyz5jr5uqrqoe6.csv"
  @UUID = "05ad6ca2-fc37-47f4-983a-72e535420e28"
  @download_path = File.join(Dir.tmpdir,
                             "dwca_hunter",
                             "sherborn",
                             "data.csv")
  @synonyms = []
  @names = []
  @vernaculars = []
  @extensions = []
  @synonyms_hash = {}
  @vernaculars_hash = {}
  super(opts)
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/sherborn.rb, line 23
def download
  puts "Downloading."
  `curl -s -L #{@url} -o #{@download_path}`
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/sherborn.rb, line 30
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  get_names
  generate_dwca
end
unpack() click to toggle source
# File lib/dwca_hunter/resources/sherborn.rb, line 28
def unpack; end

Private Instance Methods

collect_names() click to toggle source
# File lib/dwca_hunter/resources/sherborn.rb, line 43
def collect_names
  dupes = {}
  @names_index = {}
  file = CSV.open(File.join(@download_dir, "data.csv"),
                  headers: false, col_sep: "\t")
  file.each_with_index do |row, i|
    next if dupes.key?(row[1])

    dupes[row[1]] = true
    taxon_id = row[0]
    name_string = row[1]

    @names << { taxon_id: taxon_id,
                name_string: name_string }
    puts "Processed %s names" % i if i % 10_000 == 0
  end
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/sherborn.rb, line 61
def generate_dwca
  DwcaHunter.logger_write(object_id,
                          "Creating DarwinCore Archive file")
  @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
  @names.each do |n|
    @core << [n[:taxon_id], n[:name_string], "ICZN"]
  end

  @eml = {
    id: @uuid,
    title: @title,
    authors: [
      { first_name: "Charles Davies",
        last_name: "Sherborn" }
    ],
    metadata_providers: [
      { first_name: "Dmitry",
        last_name: "Mozzherin",
        email: "dmozzherin@gmail.com" }
    ],
    abstract: "Index Animalium is a monumental work that covers " \
              "400 000 zoological names registered by science " \
              "between 1758 and 1850",
    url: @url
  }
  super
end
get_names() click to toggle source
# File lib/dwca_hunter/resources/sherborn.rb, line 38
def get_names
  Dir.chdir(@download_dir)
  collect_names
end