class DwcaHunter::ResourceOpenTree

Harvesting resource for Open Tree of Life

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method DwcaHunter::Resource::new
# File lib/dwca_hunter/resources/opentree.rb, line 6
def initialize(opts = {})
  @command = "open-tree"
  @title = "Open Tree of Life Reference Taxonomy"
  @uuid = "e10865e2-cdd9-4f97-912f-08f3d5ef49f7"
  @data = []
  @extensions = []
  @count = 1
  @clades = {}
  @core = [["http://rs.tdwg.org/dwc/terms/taxonId",
            "http://globalnames.org/terms/localID",
            "http://purl.org/dc/terms/scientificName",
            "http://purl.org/dc/terms/parentNameUsageId",
            "http://purl.org/dc/terms/taxonRank",
            "http://globalnames.org/ottCrossMaps",
            "http://globalnames.org/ottNotes"]]
  @eml = {
    id: @uuid,
    title: @title,
    authors: [{ url: "https://tree.opentreeoflife.org" }],
    abstract: "Open Tree of Life aims to construct a comprehensive, " \
              "dynamic and digitally-available tree of life by " \
              "synthesizing published phylogenetic trees along with" \
              "taxonomic data. The project is a collaborative effort" \
              "between 11 PIs across 10 institutions.",
    metadata_providers: [
      { first_name: "Dmitry",
        last_name: "Mozzherin",
        email: "dmozzherin@gmail.com" }
    ],
    url: @url
  }
  @url = "http://files.opentreeoflife.org/ott/ott3.2/ott3.2.tgz"
  @download_path = File.join(Dir.tmpdir, "dwca_hunter",
                             "opentree", "data.tar.gz")
  super
end

Public Instance Methods

download() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 53
def download
  puts "Downloading cached data, update it at oot website!!"
  return unless @needs_download
  DwcaHunter.logger_write(object_id, "Downloading file -- "\
                           "it will take some time...")
  `curl -L #{url} -o #{@download_path}`
end
make_dwca() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 47
def make_dwca
  DwcaHunter.logger_write(object_id, "Extracting data")
  collect_data
  generate_dwca
end
unpack() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 43
def unpack
  unpack_tar if @needs_unpack
end

Private Instance Methods

classification() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 73
def classification
  @classification = []
  @names = {}
  DwcaHunter.logger_write(object_id, "Building classification")
  open(@taxonomy).each_with_index do |line, i|
    if ((i + 1) % BATCH_SIZE).zero?
      DwcaHunter.logger_write(object_id,
                              "Traversed #{i + 1} taxonomy lines")
    end
    @classification << line.split("|").map(&:strip)
  end
end
collect_data() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 63
def collect_data
  set_vars
  classification
end
generate_core() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 94
def generate_core
  @classification.each do |d|
    if (@count % BATCH_SIZE).zero?
      DwcaHunter.logger_write(object_id, "Traversing #{@count} core " \
                              "data record")
    end
    @core << [d[0], d[0], d[2], d[1], d[3], d[4], d[5]]
  end
end
generate_dwca() click to toggle source
Calls superclass method DwcaHunter::Resource#generate_dwca
# File lib/dwca_hunter/resources/opentree.rb, line 86
def generate_dwca
  DwcaHunter.logger_write(object_id, "Creating DarwinCore Archive file")
  DwcaHunter.logger_write(object_id, "Assembling Core Data")
  generate_core
  generate_synonyms
  super
end
generate_synonyms() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 108
def generate_synonyms
  @extensions <<
    { data: [["http://rs.tdwg.org/dwc/terms/taxonId",
              "http://rs.tdwg.org/dwc/terms/scientificName",
              "http://rs.tdwg.org/dwc/terms/taxonomicStatus"]],
      file_name: "synonyms.txt" }

  synonyms.each do |synonym|
    @extensions.first[:data] << [d[:id], synonym[:scientificName],
                                 synonym[:taxonomicStatus]]
  end
end
set_vars() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 68
def set_vars
  @taxonomy = File.join(@download_dir, "ott3.2", "taxonomy.tsv")
  @synonyms = File.join(@download_dir, "ott3.2", "synonyms.tsv")
end
synonyms() click to toggle source
# File lib/dwca_hunter/resources/opentree.rb, line 104
def synonyms
  []
end