class DwcaHunter::ResourceArctos

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/arctos.rb, line 5
def initialize(opts = {})
  @command = "arctos"
  @title = "Arctos"
  @url = "http://arctos.database.museum/cache/gn_merge.tgz"
  @UUID = "eea8315d-a244-4625-859a-226675622312"
  @download_path = File.join(Dir.tmpdir,
                             "dwca_hunter",
                             "arctos",
                             "data.tar.gz")
  @synonyms = []
  @names = []
  @vernaculars = []
  @extensions = []
  @synonyms_hash = {}
  @vernaculars_hash = {}
  super(opts)
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 23
def download
  puts "Downloading Arctos file."
  `curl -s #{@url} -o #{@download_path}`
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 32
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  get_names
  generate_dwca
end
unpack() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 28
def unpack
  unpack_tar
end

Private Instance Methods

collect_names() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 81
def collect_names
  @names_index = {}
  file = CSV.open(File.join(@download_dir, "globalnames_classification.csv"),
                  headers: true)

  names = {}
  file.each_with_index do |row, i|
    next if row["term_type"].nil?
    name = row["scientific_name"]
    if names.key?(name)
      names[name] = names[name].
        merge({row["term_type"].to_sym => row["term"]})
    else
      names[name] = {row["term_type"].to_sym => row["term"]}
    end
    puts "Preprocessed #{i} rows" if (i % 100_000).zero?
  end
  names.each_with_index do |m, i|
    canonical = m[0]
    v = m[1]
    taxon_id = "gn_#{i + 1}"
    res ={ taxon_id: taxon_id,
           name_string: canonical,
           kingdom: v[:kingdom],
           phylum: v[:phylum],
           klass: v[:class],
           order: v[:order],
           family: v[:family],
           genus: v[:genus],
           species: v[:species],
           authors: v[:author_text],
           code: v[:nomenclatural_code] }
    @names << res
    update_vernacular(taxon_id, canonical)
    update_synonym(taxon_id, canonical)
    puts "Processed #{i} names" if (i % 100_000).zero?
  end
end
collect_synonyms() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 64
def collect_synonyms
  file = CSV.open(File.join(@download_dir, "globalnames_relationships.csv"),
                  headers: true)
  file.each_with_index do |row, i|
    canonical = row["scientific_name"]
    if @synonyms_hash.key?(canonical)
      @synonyms_hash[canonical] <<
        { name_string: row["related_name"], status: row["taxon_relationship"] }
    else
      @synonyms_hash[canonical] = [
        { name_string: row["related_name"], status: row["taxon_relationship"] }
      ]
    end
    puts "Processed #{i} synonyms" if (i % 100_000).zero?
  end
end
collect_vernaculars() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 47
def collect_vernaculars
  file = CSV.open(File.join(@download_dir, "globalnames_commonname.csv"),
                  headers: true)
  file.each_with_index do |row, i|
    canonical = row["scientific_name"]
    vernacular_name_string = row["common_name"]

    if @vernaculars_hash.key?(canonical)
      @vernaculars_hash[canonical] << vernacular_name_string
    else
      @vernaculars_hash[canonical] = [vernacular_name_string]
    end

    puts "Processed #{i} vernaculars"if (i % 100_000).zero?
  end
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/arctos.rb, line 137
def generate_dwca
  DwcaHunter.logger_write(object_id,
                          "Creating DarwinCore Archive file")
  @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/kingdom",
            "http://rs.tdwg.org/dwc/terms/phylum",
            "http://rs.tdwg.org/dwc/terms/class",
            "http://rs.tdwg.org/dwc/terms/order",
            "http://rs.tdwg.org/dwc/terms/family",
            "http://rs.tdwg.org/dwc/terms/genus",
            "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
  @names.each do |n|
    @core << [n[:taxon_id], n[:name_string],
              n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
              n[:genus], n[:code]]
  end
  @extensions << {
    data: [[
      "http://rs.tdwg.org/dwc/terms/taxonID",
      "http://rs.tdwg.org/dwc/terms/vernacularName"
    ]],
    file_name: "vernacular_names.txt",
    row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
  }

  @vernaculars.each do |v|
    @extensions[-1][:data] << [v[:taxon_id], v[:vern]]
  end

  @extensions << {
    data: [[
      "http://rs.tdwg.org/dwc/terms/taxonID",
      "http://rs.tdwg.org/dwc/terms/scientificName",
      "http://rs.tdwg.org/dwc/terms/taxonomicStatus"
    ]],
    file_name: "synonyms.txt"
  }
  @synonyms.each do |s|
    @extensions[-1][:data] << [s[:taxon_id], s[:name_string], s[:status]]
  end
  @eml = {
    id: @uuid,
    title: @title,
    authors: [
      { email: "dustymc at gmail dot com" }
    ],
    metadata_providers: [
      { first_name: "Dmitry",
        last_name: "Mozzherin",
        email: "dmozzherin@gmail.com" }
    ],
    abstract: "Arctos is an ongoing effort to integrate access to specimen data, collection-management tools, and external resources on the internet.",
    url: @url
  }
  super
end
get_names() click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 40
def get_names
  Dir.chdir(@download_dir)
  collect_synonyms
  collect_vernaculars
  collect_names
end
update_synonym(taxon_id, canonical) click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 128
def update_synonym(taxon_id, canonical)
  return unless @synonyms_hash.key?(canonical)

  @synonyms_hash[canonical].each do |syn|
    @synonyms << { taxon_id: taxon_id, name_string: syn[:name_string],
                   status: syn[:status] }
  end
end
update_vernacular(taxon_id, canonical) click to toggle source
# File lib/dwca_hunter/resources/arctos.rb, line 120
def update_vernacular(taxon_id, canonical)
  return unless @vernaculars_hash.key?(canonical)

  @vernaculars_hash[canonical].each do |vern|
    @vernaculars << { taxon_id: taxon_id, vern: vern }
  end
end