class DwcaHunter::ResourceMammalSpecies

ResourceMammalSpecies converts “Mammal Species of the World” data to DarwinCore Archive file

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/mammal_species.rb, line 7
def initialize(opts = {})
  @parser = Biodiversity::Parser
  @black_sp = black_species
  @command = "mammal-species"
  @title = "The Mammal Species of The World"
  @uuid = "464dafec-1037-432d-8449-c0b309e0a030"
  @data = []
  @extensions = []
  @count = 1
  @clades = { "Mammalia" => { rank: "class", id: @count } }
  @url = "http://www.departments.bucknell.edu"\
         "/biology/resources/msw3/export.asp"
  @download_path = File.join(Dir.tmpdir, "dwca_hunter",
                             "mammalsp", "msw3-all.csv")
  super
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 35
def download
  DwcaHunter.logger_write(object_id, "Downloading file -- "\
                           "it will take some time...")
  dlr = DwcaHunter::Downloader.new(url, @download_path)
  dlr.download
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 28
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  encode
  collect_data
  generate_dwca
end
needs_unpack?() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 24
def needs_unpack?
  false
end

Private Instance Methods

adjust_clade(rec, rank, clade) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/dwca_hunter/resources/mammal_species.rb, line 149
def adjust_clade(rec, rank, clade)
  if %i[species subspecies].include? rank
    clade = [rec[:genus], rec[:species]]
    clade << rec[:subspecies] if rank == :subspecies
    clade.join(" ").gsub(/[\s]+/, " ").strip
  else
    clade
  end
end
black_species() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 194
def black_species
  res = {}
  cnt = URI.parse(
    "https://www.dropbox.com/s/jl7sc7whuidsu8w/species-black.txt?dl=1"
  ) do |f|
    f.each_line do |l|
      res[l.strip] = 1
    end
  end
  res
end
collect_data() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 48
def collect_data
  opts = { headers: true, header_converters: :symbol }
  CSV.open(@download_path + ".utf_8", opts).each do |row|
    @data << row.to_hash
  end
end
core_init() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 182
def core_init
  @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
            "http://globalnames.org/terms/localID",
            "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
            "http://rs.tdwg.org/dwc/terms/acceptedNameUsageID",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/taxonomicStatus",
            "http://rs.tdwg.org/dwc/terms/taxonRank"]]
  m = @clades["Mammalia"]
  @core << [m[:id], nil, nil, m[:id], "Mammalia", nil, "class"]
end
eml_init() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 159
def eml_init
  @eml = {
    id: @uuid,
    title: @title,
    authors: [
      { first_name: "Don",
        last_name: "Wilson" },
      { first_name: "DeeAnn",
        last_name: "Reader" }
    ],
    metadata_providers: [
      { first_name: "Dmitry",
        last_name: "Mozzherin",
        email: "dmozzherin@gmail.com" }
    ],
    abstract: "Mammal Species of the World, 3rd edition (MSW3) is "\
    "a database of mammalian taxonomy, based upon the 2005 book "\
    "Mammal Species of the World. A Taxonomic and Geographic Reference "\
    "(3rd ed). Don E. Wilson & DeeAnn M. Reeder (editors).",
    url: "http://www.vertebrates.si.edu/msw/mswcfapp/msw/index.cfm"
  }
end
encode() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 44
def encode
  DwcaHunter::Encoding.latin1_to_utf8(@download_path)
end
extensions_init() click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 206
def extensions_init
  @extensions << { data: [["http://rs.tdwg.org/dwc/terms/taxonID",
                           "http://rs.tdwg.org/dwc/terms/vernacularName",
                           "http://purl.org/dc/terms/language"]],
                   file_name: "vernacular_names.txt",
                   row_type: "http://rs.gbif.org/terms/1.0/VernacularName" }
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/mammal_species.rb, line 55
def generate_dwca
  DwcaHunter.logger_write(object_id, "Creating DarwinCore Archive file")
  core_init
  extensions_init
  eml_init
  @data.each do |rec|
    taxon = process_hierarchy(rec)
    process_vernaculars(rec, taxon)
    process_synonyms(rec, taxon)
  end
  super
end
process_hierarchy(rec) click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/dwca_hunter/resources/mammal_species.rb, line 121
def process_hierarchy(rec)
  parent_id = @clades["Mammalia"][:id]
  is_row_rank = false
  %i[order suborder infraorder superfamily family
     subfamily tribe genus subgenus species subspecies].each do |rank|
    is_row_rank = true if rank == rec[:taxonlevel].downcase.to_sym
    clade = rec[rank]
    clade = clade.capitalize if clade =~ /^[A-Z]+$/
    next if clade.to_s == ""
    clade_id = nil
    clade = adjust_clade(rec, rank, clade)
    if @clades.key?(clade)
      clade_id = @clades[clade][:id]
    else
      @count += 1
      clade_id = @count
      @clades[clade] = { id: clade_id, rank: rank }
      @core << [clade_id, nil, parent_id, clade_id, clade, nil, rank.to_s]
      if is_row_rank
        process_name(rec)
        return @core.last
      end
    end
    parent_id = clade_id
  end
end
process_name(rec) click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 112
def process_name(rec)
  name = [@core.last[4], rec[:author], rec[:date]]
  @core.last[4] = name.join(" ").gsub(%r{<[/ibsup]+>}, "").
                  gsub(/[\s]+/, " ").strip
  @core.last[1] = rec[:id]
end
process_synonyms(rec, taxon) click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/dwca_hunter/resources/mammal_species.rb, line 78
def process_synonyms(rec, taxon)
  accepted_id = taxon[0]
  parent_id = taxon[2]
  rank = taxon[-1]
  return unless %w[species subspecies].include? rank
  synonyms = rec[:synonyms].gsub(/\.$/, "").
             gsub(%r{<[/ibsup]+>}, "").gsub(/[\s]+/, " ").split(";")
  synonyms = synonyms.map(&:strip)
  synonyms.map do |s|
    next if s =~ /<u>/
    s = rec[:genus] + " " + s if s =~ /^[a-z]/
    @count += 1
    id = @count
    if real_name?(s)
      @core << [id, nil, parent_id, accepted_id, s, "synonym", rank]
    else
      puts "Rejected: #{s}"
    end
  end
end
process_vernaculars(rec, taxon) click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 68
def process_vernaculars(rec, taxon)
  return if rec[:commonname].to_s == ""
  taxon_id = taxon[0]
  lang = "en"
  name = rec[:commonname].tr("\u{0092}", "'")
  @extensions[0][:data] << [taxon_id, name, lang]
end
real_name?(str) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/dwca_hunter/resources/mammal_species.rb, line 101
def real_name?(str)
  parsed = @parser.parse(str)
  return false unless parsed[:parsed]
  epithets = parsed[:canonicalName][:simple].split(" ")[1..-1]
  return false if epithets.nil? || epithets.empty?
  epithets.each do |e|
    return false if @black_sp[e]
  end
  true
end