class DwcaHunter::ResourceMammalSpecies
ResourceMammalSpecies
converts “Mammal Species of the World” data to DarwinCore Archive file
Public Class Methods
new(opts = {})
click to toggle source
Calls superclass method
DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/mammal_species.rb, line 7 def initialize(opts = {}) @parser = Biodiversity::Parser @black_sp = black_species @command = "mammal-species" @title = "The Mammal Species of The World" @uuid = "464dafec-1037-432d-8449-c0b309e0a030" @data = [] @extensions = [] @count = 1 @clades = { "Mammalia" => { rank: "class", id: @count } } @url = "http://www.departments.bucknell.edu"\ "/biology/resources/msw3/export.asp" @download_path = File.join(Dir.tmpdir, "dwca_hunter", "mammalsp", "msw3-all.csv") super end
Public Instance Methods
download()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 35 def download DwcaHunter.logger_write(object_id, "Downloading file -- "\ "it will take some time...") dlr = DwcaHunter::Downloader.new(url, @download_path) dlr.download end
make_dwca()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 28 def make_dwca DwcaHunter.logger_write(object_id, "Extracting data") encode collect_data generate_dwca end
needs_unpack?()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 24 def needs_unpack? false end
Private Instance Methods
adjust_clade(rec, rank, clade)
click to toggle source
rubocop:enable Metrics/AbcSize
# File lib/dwca_hunter/resources/mammal_species.rb, line 149 def adjust_clade(rec, rank, clade) if %i[species subspecies].include? rank clade = [rec[:genus], rec[:species]] clade << rec[:subspecies] if rank == :subspecies clade.join(" ").gsub(/[\s]+/, " ").strip else clade end end
black_species()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 194 def black_species res = {} cnt = URI.parse( "https://www.dropbox.com/s/jl7sc7whuidsu8w/species-black.txt?dl=1" ) do |f| f.each_line do |l| res[l.strip] = 1 end end res end
collect_data()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 48 def collect_data opts = { headers: true, header_converters: :symbol } CSV.open(@download_path + ".utf_8", opts).each do |row| @data << row.to_hash end end
core_init()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 182 def core_init @core = [["http://rs.tdwg.org/dwc/terms/taxonID", "http://globalnames.org/terms/localID", "http://rs.tdwg.org/dwc/terms/parentNameUsageID", "http://rs.tdwg.org/dwc/terms/acceptedNameUsageID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus", "http://rs.tdwg.org/dwc/terms/taxonRank"]] m = @clades["Mammalia"] @core << [m[:id], nil, nil, m[:id], "Mammalia", nil, "class"] end
eml_init()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 159 def eml_init @eml = { id: @uuid, title: @title, authors: [ { first_name: "Don", last_name: "Wilson" }, { first_name: "DeeAnn", last_name: "Reader" } ], metadata_providers: [ { first_name: "Dmitry", last_name: "Mozzherin", email: "dmozzherin@gmail.com" } ], abstract: "Mammal Species of the World, 3rd edition (MSW3) is "\ "a database of mammalian taxonomy, based upon the 2005 book "\ "Mammal Species of the World. A Taxonomic and Geographic Reference "\ "(3rd ed). Don E. Wilson & DeeAnn M. Reeder (editors).", url: "http://www.vertebrates.si.edu/msw/mswcfapp/msw/index.cfm" } end
encode()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 44 def encode DwcaHunter::Encoding.latin1_to_utf8(@download_path) end
extensions_init()
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 206 def extensions_init @extensions << { data: [["http://rs.tdwg.org/dwc/terms/taxonID", "http://rs.tdwg.org/dwc/terms/vernacularName", "http://purl.org/dc/terms/language"]], file_name: "vernacular_names.txt", row_type: "http://rs.gbif.org/terms/1.0/VernacularName" } end
generate_dwca()
click to toggle source
Calls superclass method
DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/mammal_species.rb, line 55 def generate_dwca DwcaHunter.logger_write(object_id, "Creating DarwinCore Archive file") core_init extensions_init eml_init @data.each do |rec| taxon = process_hierarchy(rec) process_vernaculars(rec, taxon) process_synonyms(rec, taxon) end super end
process_hierarchy(rec)
click to toggle source
rubocop:disable Metrics/AbcSize
# File lib/dwca_hunter/resources/mammal_species.rb, line 121 def process_hierarchy(rec) parent_id = @clades["Mammalia"][:id] is_row_rank = false %i[order suborder infraorder superfamily family subfamily tribe genus subgenus species subspecies].each do |rank| is_row_rank = true if rank == rec[:taxonlevel].downcase.to_sym clade = rec[rank] clade = clade.capitalize if clade =~ /^[A-Z]+$/ next if clade.to_s == "" clade_id = nil clade = adjust_clade(rec, rank, clade) if @clades.key?(clade) clade_id = @clades[clade][:id] else @count += 1 clade_id = @count @clades[clade] = { id: clade_id, rank: rank } @core << [clade_id, nil, parent_id, clade_id, clade, nil, rank.to_s] if is_row_rank process_name(rec) return @core.last end end parent_id = clade_id end end
process_name(rec)
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 112 def process_name(rec) name = [@core.last[4], rec[:author], rec[:date]] @core.last[4] = name.join(" ").gsub(%r{<[/ibsup]+>}, ""). gsub(/[\s]+/, " ").strip @core.last[1] = rec[:id] end
process_synonyms(rec, taxon)
click to toggle source
rubocop:disable Metrics/AbcSize
# File lib/dwca_hunter/resources/mammal_species.rb, line 78 def process_synonyms(rec, taxon) accepted_id = taxon[0] parent_id = taxon[2] rank = taxon[-1] return unless %w[species subspecies].include? rank synonyms = rec[:synonyms].gsub(/\.$/, ""). gsub(%r{<[/ibsup]+>}, "").gsub(/[\s]+/, " ").split(";") synonyms = synonyms.map(&:strip) synonyms.map do |s| next if s =~ /<u>/ s = rec[:genus] + " " + s if s =~ /^[a-z]/ @count += 1 id = @count if real_name?(s) @core << [id, nil, parent_id, accepted_id, s, "synonym", rank] else puts "Rejected: #{s}" end end end
process_vernaculars(rec, taxon)
click to toggle source
# File lib/dwca_hunter/resources/mammal_species.rb, line 68 def process_vernaculars(rec, taxon) return if rec[:commonname].to_s == "" taxon_id = taxon[0] lang = "en" name = rec[:commonname].tr("\u{0092}", "'") @extensions[0][:data] << [taxon_id, name, lang] end
real_name?(str)
click to toggle source
rubocop:enable Metrics/AbcSize
# File lib/dwca_hunter/resources/mammal_species.rb, line 101 def real_name?(str) parsed = @parser.parse(str) return false unless parsed[:parsed] epithets = parsed[:canonicalName][:simple].split(" ")[1..-1] return false if epithets.nil? || epithets.empty? epithets.each do |e| return false if @black_sp[e] end true end