class Puree::XMLExtractor::Dataset

Dataset XML extractor.

Public Class Methods

new(xml) click to toggle source
Calls superclass method Puree::XMLExtractor::Resource::new
# File lib/puree/xml_extractor/dataset.rb, line 18
def initialize(xml)
  super
  setup_model :dataset
end

Public Instance Methods

available() click to toggle source

Date made available @return [Time, nil]

# File lib/puree/xml_extractor/dataset.rb, line 25
def available
  Puree::Util::Date.hash_to_time temporal_date('publicationDate')
end
doi() click to toggle source

Digital Object Identifier @return [String, nil]

# File lib/puree/xml_extractor/dataset.rb, line 31
def doi
  xpath_query_for_single_value '/doi'
end
files() click to toggle source

Supporting files @return [Array<Puree::Model::File>]

# File lib/puree/xml_extractor/dataset.rb, line 37
def files
  xpath_result = xpath_query '/documents/document'
  docs = []
  xpath_result.each do |d|
    doc = Puree::Model::File.new
    doc.name = d.xpath('title').text.strip
    # doc.mime = d.xpath('mimeType').text.strip
    # doc.size = d.xpath('size').text.strip.to_i
    doc.url = d.xpath('url').text.strip
    # doc['createdDate'] = d.xpath('createdDate').text.strip
    # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
    # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
    document_license = d.xpath('documentLicense').first
    if document_license
      license = Puree::Model::CopyrightLicense.new
      license.name = document_license.xpath('term/text').text.strip
      # license.name = document_license.xpath('term/localizedString').text.strip
      # license.url = document_license.xpath('description/localizedString').text.strip
      doc.license = license if license.data?
    end
    docs << doc
  end
  docs.uniq { |d| d.url }
end
keywords() click to toggle source

@return [Array<String>]

# File lib/puree/xml_extractor/dataset.rb, line 63
def keywords
  keyword_group 'User-Defined Keywords'
end
persons_external() click to toggle source

@return [Array<Puree::Model::EndeavourPerson>]

# File lib/puree/xml_extractor/dataset.rb, line 73
def persons_external
  persons 'external', '/personAssociations/personAssociation'
end
persons_internal() click to toggle source

@return [Array<Puree::Model::EndeavourPerson>]

# File lib/puree/xml_extractor/dataset.rb, line 68
def persons_internal
  persons 'internal', '/personAssociations/personAssociation'
end
persons_other() click to toggle source

@return [Array<Puree::Model::EndeavourPerson>]

# File lib/puree/xml_extractor/dataset.rb, line 78
def persons_other
  persons 'other', '/personAssociations/personAssociation'
end
production() click to toggle source

Date of data production @return [Puree::Model::TemporalRange, nil]

# File lib/puree/xml_extractor/dataset.rb, line 84
def production
  temporal_range 'dataProductionPeriod/startDate', 'dataProductionPeriod/endDate'
end
spatial_places() click to toggle source

@return [Array<String>]

# File lib/puree/xml_extractor/dataset.rb, line 89
def spatial_places
  # Data from free-form text box
  xpath_result = xpath_query '/geographicalCoverage'
  data = []
  xpath_result.each do |i|
    data << i.text.strip
  end
  data.uniq
end
spatial_point() click to toggle source

Spatial coverage point @return [Puree::Model::SpatialPoint, nil]

# File lib/puree/xml_extractor/dataset.rb, line 101
def spatial_point
  xpath_result = xpath_query '/geoLocation/point'
  if !xpath_result.empty?
    point = Puree::Model::SpatialPoint.new
    arr = xpath_result.text.strip.split(',')
    point.latitude = arr[0].to_f
    point.longitude = arr[1].to_f
    point
  end
end
temporal() click to toggle source

Temporal coverage @return [Puree::Model::TemporalRange, nil]

# File lib/puree/xml_extractor/dataset.rb, line 114
def temporal
  temporal_range 'temporalCoveragePeriod/startDate', 'temporalCoveragePeriod/endDate'
end

Private Instance Methods

combine_metadata() click to toggle source
# File lib/puree/xml_extractor/dataset.rb, line 150
def combine_metadata
  super
  @model.available = available
  @model.description = description
  @model.doi = doi
  @model.files = files
  @model.keywords = keywords
  @model.organisational_units = organisational_units
  @model.owner = owner
  @model.persons_internal = persons_internal
  @model.persons_external = persons_external
  @model.persons_other = persons_other
  @model.production = production
  @model.research_outputs = research_outputs
  @model.publisher = publisher
  @model.spatial_places = spatial_places
  @model.spatial_point = spatial_point
  @model.temporal = temporal
  @model.title = title
  @model.workflow = workflow
  @model
end
temporal_date(date_path) click to toggle source

Temporal coverage date @return [Hash]

# File lib/puree/xml_extractor/dataset.rb, line 136
def temporal_date(date_path)
  path = "/#{date_path}"
  xpath_result = xpath_query path
  o = {}
  o['year'] = xpath_result.xpath('year').text.strip
  o['month'] = xpath_result.xpath('month').text.strip
  o['day'] = xpath_result.xpath('day').text.strip
  Puree::Util::Date.normalise o
end
temporal_range(start_path, end_path) click to toggle source

Temporal range @return [Puree::Model::TemporalRange, nil]

# File lib/puree/xml_extractor/dataset.rb, line 122
def temporal_range(start_path, end_path)
  range_start = Puree::Util::Date.hash_to_time temporal_date(start_path)
  range_end = Puree::Util::Date.hash_to_time temporal_date(end_path)
  if range_start || range_end
    range = Puree::Model::TemporalRange.new
    range.start = range_start if range_start
    range.end = range_end if range_end
    return range
  end
  nil
end
xpath_root() click to toggle source
# File lib/puree/xml_extractor/dataset.rb, line 146
def xpath_root
  '/dataSet'
end