class DwcaHunter::ResourceMCZ

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/mcz.rb, line 5
def initialize(opts = {})
  @command = "mcz"
  @title = "MCZbase"
  @url = "https://uofi.box.com/shared/static/x1dp86l48hyjkwfl106ejj25ormkzwip.gz"
  @UUID = "c79d055b-211b-40de-8e27-618011656265"
  @download_path = File.join(Dir.tmpdir,
                             "dwca_hunter",
                             "mcz",
                             "data.tar.gz")
  @synonyms = []
  @names = []
  @vernaculars = []
  @extensions = []
  @synonyms_hash = {}
  @vernaculars_hash = {}
  super(opts)
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/mcz.rb, line 23
def download
  puts "Downloading cached verion of the file. Ask MCZ for update."
  `curl -s -L #{@url} -o #{@download_path}`
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/mcz.rb, line 32
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  get_names
  generate_dwca
end
unpack() click to toggle source
# File lib/dwca_hunter/resources/mcz.rb, line 28
def unpack
  unpack_tar
end

Private Instance Methods

collect_names() click to toggle source
# File lib/dwca_hunter/resources/mcz.rb, line 45
def collect_names
  @names_index = {}
  file = CSV.open(File.join(@download_dir, "taxonomy_export_2021Feb2.csv"),
                  headers: true)
  file.each_with_index do |row, i|
    canonical = row["SCIENTIFIC_NAME"]
    authors = row["AUTHOR_TEXT"]
    kingdom = row["KINGDOM"]
    phylum = row["PHYLUM"]
    klass = row["PHYLCLASS"]
    order = row["PHYLORDER"]
    family = row["FAMILY"]
    genus = row["GENUS"]
    code = row["NOMENCLATURAL_CODE"]

    taxon_id = "gn_#{i + 1}"
    name_string = "#{canonical} #{authors}".strip
    @names << { taxon_id: taxon_id,
                name_string: name_string,
                kingdom: kingdom,
                phylum: phylum,
                klass: klass,
                order: order,
                family: family,
                genus: genus,
                code: code }
    puts "Processed %s names" % i if i % 10_000 == 0
  end
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/mcz.rb, line 75
def generate_dwca
  DwcaHunter.logger_write(object_id,
                          "Creating DarwinCore Archive file")
  @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/kingdom",
            "http://rs.tdwg.org/dwc/terms/phylum",
            "http://rs.tdwg.org/dwc/terms/class",
            "http://rs.tdwg.org/dwc/terms/order",
            "http://rs.tdwg.org/dwc/terms/family",
            "http://rs.tdwg.org/dwc/terms/genus",
            "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
  @names.each do |n|
    @core << [n[:taxon_id], n[:name_string],
              n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
              n[:genus], n[:code]]
  end

  @eml = {
    id: @uuid,
    title: @title,
    authors: [
      { first_name: "MCZ",
        last_name: "Harvard University" }
    ],
    metadata_providers: [
      { first_name: "Paul",
        last_name: "Morris" }
    ],

    abstract: "The Museum of Comparative Zoology was founded in 1859 on " \
    "the concept that collections are an integral and fundamental " \
    "component of zoological research and teaching. This more than " \
    "150-year-old commitment remains a strong and proud tradition for " \
    "the MCZ. The present-day MCZ contains over 21-million specimens in " \
    "ten research collections which comprise one of the world's richest " \
    "and most varied resources for studying the diversity of life. The " \
    "museum serves as the primary repository for zoological specimens " \
    "collected by past and present Harvard faculty-curators, staff and " \
    "associates conducting research around the world. As a premier " \
    "university museum and research institution, the specimens and " \
    "their related data are available to researchers of the scientific " \
    "and museum community. doi:10.5281/zenodo.891420",
    url: @url
  }
  super
end
get_names() click to toggle source
# File lib/dwca_hunter/resources/mcz.rb, line 40
def get_names
  Dir.chdir(@download_dir)
  collect_names
end