class PaleodbHarvester
Public Class Methods
new(download_dir)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 2 def initialize(download_dir) @dir = File.join(download_dir, "json") FileUtils.mkdir_p(@dir) @in_dir = download_dir @taxa_csv = CSV.open(File.join(@in_dir, "taxa.csv"), headers: true) @refs_csv = CSV.open(File.join(@in_dir, "refs.csv"), headers: true) @taxa_refs_csv = CSV.open(File.join(@in_dir, "taxa_refs.csv"), headers: true) @occurences_csv = CSV.open(File.join(@in_dir, "occurences.csv"), headers: true) end
Public Instance Methods
details(row)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 90 def details(row) row = strip row ref = "#{row['pubtitle']}" ref << " #{row['pubno']}" unless row['pubno'].empty? ref << ": #{row['firstpage']}" unless row['firstpage'].empty? ref << "--#{row['lastpage']}" unless row['lastpage'].empty? ref << " (#{row["doi"]})" unless row['doi'].empty? ref.gsub(/[\s]{2,}/, " ").strip end
ecol(row)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 52 def ecol(row) row = strip row "#{row['life_habit']} #{row['diet']}" end
enterer(r)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 41 def enterer(r) res = [r["enterer"], r["modifier"]].map(&:to_s) .map(&:strip).uniq.select { |e| e != "" } res.empty? ? "" : res.join(", ") end
extinct(val)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 48 def extinct(val) val == "extinct" ? 1 : 0 end
occurences()
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 117 def occurences occ = {} @occurences_csv.each_with_index do |r, i| r = strip r row = { id: r["accepted_no"], name: r["accepted_name"], country: r["cc"], state: r["state"], age_min: r["min_ma"], age_max: r["max_ma"] } if occ.key? r["accepted_no"] occ[r["accepted_no"]] << row else occ[r["accepted_no"]] = [row] end end f = open(File.join(@dir, "occurences.json"), "w:utf-8") f.write(JSON.pretty_generate(occ)) f.close end
refs()
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 57 def refs # "reference_no","record_type","ref_type","author1init","author1last", # "author2init","author2last","otherauthors","pubyr","reftitle","pubtitle", # "editors","pubvol","pubno","firstpage","lastpage","publication_type", # "language","doi" # {"id":31671,"orig":true,"author":"Hahn, C. W.", # "year":1834,"title":"Die wanzenartigen Insecten.", # "details":"C. H. Zeh, Nurnberg. 2: 33--120.", # "distribution":"Germany","comment":"n. sp."} refs = {} @refs_csv.each do |r| r = strip r authorship, author = authors(r) refs[r["reference_no"]] = { id: r["reference_no"], author: author, authorship: authorship, year: r["pubyr"], title: r["reftitle"], details: details(r) } end f = open(File.join(@dir, "refs.json"), "w:utf-8") f.write(JSON.pretty_generate(refs)) f.close end
strip(row)
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 134 def strip(row) row.each_with_object({}) do |(k, v), h| h[k] = v.nil? ? nil : v.strip end end
taxa()
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 12 def taxa # "orig_no","taxon_no","record_type","flags","taxon_rank", # "taxon_name","difference","accepted_no","accepted_rank", # "accepted_name","parent_no","reference_no","is_extant","n_occs" taxa = {} name2id = {} @taxa_csv.each do |r| r = strip(r) taxa[r["taxon_no"]] = { t_id: r["orig_no"], id: r["taxon_no"], rank: r["taxon_rank"], name: r["taxon_name"], auth: r["taxon_attr"], extinct: extinct(r["is_extant"]), vernacular: r["common_name"], annot: r["difference"], acc_id: r["accepted_no"], acc_rank: r["accepted_rank"], acc_name: r["accepted_name"], ecol: ecol(r), parent_id: r["parent_no"], ref: r["reference_no"], occs_num: r["n_occs"], enterer: enterer(r) } name2id[r["taxon_name"]] = { id: r["taxon_no"], acc_id: r["accepted_no"] } end f = open(File.join(@dir, "taxa.json"), "w:utf-8") f.write(JSON.pretty_generate(taxa)) f.close f = open(File.join(@dir, "name_id.json"), "w:utf-8") f.write(JSON.pretty_generate(name2id)) f.close end
taxa_refs()
click to toggle source
# File lib/dwca_hunter/resources/paleodb_harvester.rb, line 100 def taxa_refs tr = {} @taxa_refs_csv.each do |r| r = strip r row = { acc_id: r["accepted_no"], name: r["accepted_name"], ref_id: r["reference_no"] } if tr.key? r["accepted_no"] tr[r["accepted_no"]] << row else tr[r["accepted_no"]] = [row] end end f = open(File.join(@dir, "taxa_refs.json"), "w:utf-8") f.write(JSON.pretty_generate(tr)) f.close end