class DwcaHunter::ResourceOpenTree
Harvesting resource for Open Tree of Life
Public Class Methods
new(opts = {})
click to toggle source
Calls superclass method
DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/opentree.rb, line 6 def initialize(opts = {}) @command = "open-tree" @title = "Open Tree of Life Reference Taxonomy" @uuid = "e10865e2-cdd9-4f97-912f-08f3d5ef49f7" @data = [] @extensions = [] @count = 1 @clades = {} @core = [["http://rs.tdwg.org/dwc/terms/taxonId", "http://globalnames.org/terms/localID", "http://purl.org/dc/terms/scientificName", "http://purl.org/dc/terms/parentNameUsageId", "http://purl.org/dc/terms/taxonRank", "http://globalnames.org/ottCrossMaps", "http://globalnames.org/ottNotes"]] @eml = { id: @uuid, title: @title, authors: [{ url: "https://tree.opentreeoflife.org" }], abstract: "Open Tree of Life aims to construct a comprehensive, " \ "dynamic and digitally-available tree of life by " \ "synthesizing published phylogenetic trees along with" \ "taxonomic data. The project is a collaborative effort" \ "between 11 PIs across 10 institutions.", metadata_providers: [ { first_name: "Dmitry", last_name: "Mozzherin", email: "dmozzherin@gmail.com" } ], url: @url } @url = "http://files.opentreeoflife.org/ott/ott3.2/ott3.2.tgz" @download_path = File.join(Dir.tmpdir, "dwca_hunter", "opentree", "data.tar.gz") super end
Public Instance Methods
download()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 53 def download puts "Downloading cached data, update it at oot website!!" return unless @needs_download DwcaHunter.logger_write(object_id, "Downloading file -- "\ "it will take some time...") `curl -L #{url} -o #{@download_path}` end
make_dwca()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 47 def make_dwca DwcaHunter.logger_write(object_id, "Extracting data") collect_data generate_dwca end
unpack()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 43 def unpack unpack_tar if @needs_unpack end
Private Instance Methods
classification()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 73 def classification @classification = [] @names = {} DwcaHunter.logger_write(object_id, "Building classification") open(@taxonomy).each_with_index do |line, i| if ((i + 1) % BATCH_SIZE).zero? DwcaHunter.logger_write(object_id, "Traversed #{i + 1} taxonomy lines") end @classification << line.split("|").map(&:strip) end end
collect_data()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 63 def collect_data set_vars classification end
generate_core()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 94 def generate_core @classification.each do |d| if (@count % BATCH_SIZE).zero? DwcaHunter.logger_write(object_id, "Traversing #{@count} core " \ "data record") end @core << [d[0], d[0], d[2], d[1], d[3], d[4], d[5]] end end
generate_dwca()
click to toggle source
Calls superclass method
DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/opentree.rb, line 86 def generate_dwca DwcaHunter.logger_write(object_id, "Creating DarwinCore Archive file") DwcaHunter.logger_write(object_id, "Assembling Core Data") generate_core generate_synonyms super end
generate_synonyms()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 108 def generate_synonyms @extensions << { data: [["http://rs.tdwg.org/dwc/terms/taxonId", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"]], file_name: "synonyms.txt" } synonyms.each do |synonym| @extensions.first[:data] << [d[:id], synonym[:scientificName], synonym[:taxonomicStatus]] end end
set_vars()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 68 def set_vars @taxonomy = File.join(@download_dir, "ott3.2", "taxonomy.tsv") @synonyms = File.join(@download_dir, "ott3.2", "synonyms.tsv") end
synonyms()
click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 104 def synonyms [] end