class Dataverse::Dataset

Constants

MD_TYPES
MD_TYPES_JSON
MD_TYPES_XML

Attributes

id[R]

Public Class Methods

create(data:, dataverse:) click to toggle source
# File lib/dataverse/dataset.rb, line 19
def self.create(data:, dataverse:)
  new_dataset(dataverse, data)
end
id(id) click to toggle source
# File lib/dataverse/dataset.rb, line 10
def self.id(id)
  Dataset.new(id)
end
import(data:, dataverse:, pid:, publish: false, ddi: false) click to toggle source
# File lib/dataverse/dataset.rb, line 23
def self.import(data:, dataverse:, pid:, publish: false, ddi: false)
  new_dataset(dataverse, data, import: pid, publish: publish, ddi: ddi)
end
new(id) click to toggle source
# File lib/dataverse/dataset.rb, line 193
def initialize(id)
  @id = id
  init(get_data)
end
pid(pid) click to toggle source
# File lib/dataverse/dataset.rb, line 14
def self.pid(pid)
  data = api_call('datasets/:persistentId', params: {'persistentId' => pid})
  Dataset.new(data['id'])
end

Private Class Methods

parse(dataverse, data, import: nil, publish: false, ddi: false) click to toggle source
# File lib/dataverse/dataset.rb, line 334
def self.parse(dataverse, data, import: nil, publish: false, ddi: false)

  dataverse = dataverse.id if dataverse.is_a?(Dataverse)

  data = StringIO.new(data.to_json) if data.is_a?(Hash)

  if data.is_a?(String)
    begin
      if File.exist?(data)
        data = File.open(data, 'r')
      elsif ddi || JSON::parse(data)
        data = StringIO.new(data)
      end
    rescue JSON::ParserError, File
      data = nil
    end
  end
  
  unless data.is_a?(File) || data.is_a?(StringIO)
    raise Error.new("Data could not be parsed. Should be a Hash, filename or JSON string.")
  end

  url = "dataverses/#{dataverse}/datasets"
  url += '/:import' if import

  params = {release: publish ? 'yes' : 'no'}
  params[:pid] = import if import

  headers = {content_type: :json}
  headers[:content_type] = :xml if ddi

  result = api_call(url, method: :post, headers: headers, body: data, params: params)
  puts result

  return Dataset.id(result['id'])

ensure
  data.close if data.is_a?(File)

end

Public Instance Methods

author(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 95
def author(version: :latest)
  metadata(version: version).fetch('author').first.fetch('authorName')
end
call(url, **args) click to toggle source
# File lib/dataverse/dataset.rb, line 55
def call(url, **args)
  api_call("datasets/#{id}/#{url}", **args)
end
created(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 103
def created(version: :latest)
  Time.parse(version_data(version).fetch('createTime')).getlocal
end
delete() click to toggle source
# File lib/dataverse/dataset.rb, line 27
def delete
  raise Error.new 'Can only delete draft version' unless draft_version
  versions
  result = call('versions/:draft', method: :delete)
  @version_data.delete(:draft)
  @metadata.delete(:draft)
  @files.delete(:draft)
  @version_numbers&.delete(:draft)
  init({}) if published_versions.empty?
  result['message']
end
download(filename = 'dataverse_files.zip', version: nil) click to toggle source
# File lib/dataverse/dataset.rb, line 165
def download(filename = 'dataverse_files.zip', version: nil)
  if version
    v = version_string(version)
    raise Error.new("Version '#{version}' does not exist") unless v
    version = v
  end
  File.open(filename, 'w') do |f|
    size = 0
    block = proc do |response|
      response.value
      response.read_body do |chunk|
        size += chunk.size
        f.write chunk
      end
    rescue Net::HTTPServerException
      return false
    end
    url = 'access/dataset/:persistentId'
    url += "/versions/#{version}" if version
    params = {persistentId: pid}
    api_call(url, params: params, block: block)
    f.close
    size
  end
end
download_size(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 160
def download_size(version: :latest)
  data = call("versions/#{version_string(version)}/downloadsize")
  data['message'][/[,\d]+/].delete(',').to_i
end
draft_version() click to toggle source
# File lib/dataverse/dataset.rb, line 76
def draft_version
  return :draft if @version_data.keys.include?(:draft)
end
export_metadata(md_type) click to toggle source
# File lib/dataverse/dataset.rb, line 120
def export_metadata(md_type)
  return nil unless version(:published)
  format = case md_type.to_s
  when *MD_TYPES_XML
    :xml
  when *MD_TYPES_JSON
    :json
  when 'rdm'
    return rdm_data
  when 'raw'
    return raw_data
  else
    raise Error.new("Unknown metadata format: '#{md_type}'")
  end
  api_call('datasets/export', params: {exporter: md_type, persistentId: pid}, format: format)
end
files(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 156
def files(version: :latest)
  @files[resolve_version(version)] || []
end
metadata(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 152
def metadata(version: :latest)
  @metadata[resolve_version(version)] || {}
end
metadata_fields(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 112
def metadata_fields(version: :latest)
  metadata(version: version)&.keys || []
end
pid(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 59
def pid(version: :latest)
  version_data(version).fetch('datasetPersistentId')
end
publish(major: true) click to toggle source
# File lib/dataverse/dataset.rb, line 47
def publish(major: true)
  result = call('actions/:publish', method: :post, 
    params: {type: major ? 'major' : 'minor'}, format: :status
  )
  return "Dataset #{pid} published" if result == 200
  return "Dataset #{pid} waiting for review" if result == 202
end
published(version: :published) click to toggle source
# File lib/dataverse/dataset.rb, line 107
def published(version: :published)
  return nil unless version_data(version).has_key?('releaseTime')
  Time.parse(version_data(version).fetch('releaseTime')).getlocal
end
published_versions() click to toggle source
# File lib/dataverse/dataset.rb, line 80
def published_versions
  @published_versions ||= call('versions').map do |x|
    next unless x['versionState'] == 'RELEASED'
    "#{x['versionNumber']}.#{x['versionMinorNumber']}".to_f
  end.compact
end
raw_data(version: :latest, with_files: false) click to toggle source
# File lib/dataverse/dataset.rb, line 145
def raw_data(version: :latest, with_files: false)
  result = api_data.dup.merge(version_data(resolve_version(version)))
  result['metadataBlocks'] = call("/versions/#{version_string(version)}/metadata")
  result['files'] = call("/versions/#{version_string(version)}/files") if with_files
  { 'datasetVersion' => result }
end
rdm_data(version: :published) click to toggle source
# File lib/dataverse/dataset.rb, line 137
def rdm_data(version: :published)
  return nil unless version(version)
  api_data
    .merge(version_data(version))
    .merge('metadata' => metadata(version: version))
    .merge('files' => files(version: version))
end
reject(reason) click to toggle source
# File lib/dataverse/dataset.rb, line 43
def reject(reason)
  call('returnToAuthor', method: post, body: reason)
end
size() click to toggle source
# File lib/dataverse/dataset.rb, line 63
def size
  data = call("storagesize", params: {includeCached: 'true'})
  data['message'][/[,\d]+/].delete(',').to_i
end
submit() click to toggle source
# File lib/dataverse/dataset.rb, line 39
def submit
  call('submitForReview', method: post)
end
title(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 91
def title(version: :latest)
  metadata(version: version).fetch('title')
end
updated(version: :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 99
def updated(version: :latest)
  Time.parse(version_data(version).fetch('lastUpdateTime')).getlocal
end
version(version = :latest) click to toggle source
# File lib/dataverse/dataset.rb, line 87
def version(version = :latest)
  resolve_version(version, raise_if_not_found: false)
end
versions() click to toggle source
# File lib/dataverse/dataset.rb, line 68
def versions
  @version_numbers ||= begin
    data = [:latest, :published] + [draft_version].compact + published_versions
    data.delete(:published) unless published_versions.size > 0
    data
  end
end

Protected Instance Methods

get_data() click to toggle source
# File lib/dataverse/dataset.rb, line 207
def get_data
  api_call("datasets/#{id}")
end
init(data) click to toggle source
Calls superclass method Dataverse::Base#init
# File lib/dataverse/dataset.rb, line 198
def init(data)
  @version_data = {}
  @metadata = {}
  @files = {}
  @version_numbers = nil
  @published_versions = nil
  super(process_data(data))
end
resolve_version(version, raise_if_not_found: true) click to toggle source
# File lib/dataverse/dataset.rb, line 211
def resolve_version(version, raise_if_not_found: true)
  _version = version

  version = case version
  when ':draft', 'draft'
    :draft
  when ':latest', 'latest'
    :latest
  when ':published', 'published', ':latest-published', 'latest-published'
    :published
  when Numeric, String
    version.to_f
  else
    version
  end

  case version
  when :latest
    version = draft_version || published_versions.max
  when :published
    version = published_versions.max
  end

  unless @version_data.keys.include?(version)
    version = versions.find {|x| x == version}
    raise VersionError.new(_version) if version.nil? && raise_if_not_found
    return nil unless version
    data = call("versions/#{version}")
    process_version_data(data)
  end

  version
end
version_data(version) click to toggle source
# File lib/dataverse/dataset.rb, line 257
def version_data(version)
  @version_data[resolve_version(version)].transform_keys { |k| k == 'id' ? 'versionId' : k }
end
version_string(version) click to toggle source
# File lib/dataverse/dataset.rb, line 245
def version_string(version)
  v = resolve_version(version)
  case v
  when Symbol
    ":#{v}"
  when Numeric
    v.to_s
  else
    v
  end
end

Private Instance Methods

compound_to_value(data) click to toggle source
# File lib/dataverse/dataset.rb, line 325
def compound_to_value(data)
  return data.map {|x| compound_to_value(x)} if data.is_a?(Array)
  hash = {}
  data.values.each do |v|
    hash[v['typeName']] = field_to_value(v)
  end
  hash
end
field_to_value(field) click to toggle source
# File lib/dataverse/dataset.rb, line 312
def field_to_value(field)
  case field['typeClass']
  when 'primitive'
    return field['value']
  when 'controlledVocabulary'
    return field['value']
  when 'compound'
    compound_to_value(field['value'])
  else
    raise Error.new("Unsupported typeClass: '#{field['typeClass']}'")
  end
end
get_version_number(data) click to toggle source
# File lib/dataverse/dataset.rb, line 278
def get_version_number(data)
  case data['versionState']
  when 'DRAFT'
    :draft
  when 'RELEASED'
    "#{data['versionNumber']}.#{data['versionMinorNumber']}".to_f
  else
    raise Error.new("Unsupported version state: '#{data['versionState']}")
  end
end
pack_files(files) click to toggle source
# File lib/dataverse/dataset.rb, line 305
def pack_files(files)
  files.map do |file|
    detail = file.delete('dataFile')
    file.merge(detail)
  end
end
pack_metadata(metadata) click to toggle source
# File lib/dataverse/dataset.rb, line 295
def pack_metadata(metadata)
  data = {}
  metadata.each_value do |block|
    block['fields'].each do |field|
      data[field['typeName']] = field_to_value(field)
    end
  end
  data
end
process_data(data) click to toggle source
# File lib/dataverse/dataset.rb, line 263
def process_data(data)
  return {} if data.nil? || data.empty?
  version_data = data.delete('latestVersion')
  process_version_data(version_data)
  data
end
process_version_data(data) click to toggle source
# File lib/dataverse/dataset.rb, line 270
def process_version_data(data)
  metadata = pack_metadata(data.delete('metadataBlocks'))
  files = pack_files(data.delete('files'))
  version = get_version_number(data)
  store_data(version, data, metadata, files)
  version
end
store_data(version, data, metadata, files) click to toggle source
# File lib/dataverse/dataset.rb, line 289
def store_data(version, data, metadata, files)
  @version_data[version] = data.freeze
  @metadata[version] = metadata.freeze
  @files[version] = files.freeze
end