class ROF::Translators::OsfToRof

Class for managing OSF Archive data transformations It is called after the get-from-osf task, and before the work-xlat task

Attributes

config[R]
previously_archived_pid_finder[R]

A function responsible for finding the previously archive pid. @return [#call] @see default_previously_archived_pid_finder for interface

project[R]
ttl_data[R]

this is an array- the addition elements are the contributor(s) @return [Array<Hash>] @see ttl_from_targz

Public Class Methods

call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder) click to toggle source

Convert Osf Archive tar.gz to ROF

# File lib/rof/translators/osf_to_rof.rb, line 21
def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder)
  new(project, config, previously_archived_pid_finder).call
end
default_previously_archived_pid_finder() click to toggle source

@todo Set this to be something more meaningful than an empty lambda @return [#call]

# File lib/rof/translators/osf_to_rof.rb, line 16
def self.default_previously_archived_pid_finder
  ->(archive_type, osf_project_identifier) {}
end
new(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder) click to toggle source
# File lib/rof/translators/osf_to_rof.rb, line 25
def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder)
  @config = config
  @project = project
  @previously_archived_pid_finder = previously_archived_pid_finder
  @osf_map = ROF::OsfToNDMap
end

Public Instance Methods

archive_type() click to toggle source

@api private @see github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62

# File lib/rof/translators/osf_to_rof.rb, line 42
def archive_type
  project.fetch('package_type')
end
call() click to toggle source
# File lib/rof/translators/osf_to_rof.rb, line 32
def call
  rof_array = []
  return {} if project.nil?
  @ttl_data = ttl_from_targz(source_slug + '.ttl')
  rof_array[0] = build_archive_record
  rof_array
end
osf_project_identifier() click to toggle source

@api private @see github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106

# File lib/rof/translators/osf_to_rof.rb, line 60
def osf_project_identifier
  return source_slug if project['package_type'] == 'OSF Project'
  osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id'])
end
source_slug() click to toggle source

@api private This is a bit of a misnomer; As used it represents the path to the project or registration that we have ingested (e.g. osf.io/:source_slug)

It was previously named :project_identifier in this class, but that gets conflated with the underlying object's osf_project_identifier (e.g. what OSF Project was this archive originally based on)

@see github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L96

# File lib/rof/translators/osf_to_rof.rb, line 54
def source_slug
  project.fetch('project_identifier')
end

Private Instance Methods

apply_previous_archived_version_if_applicable(rels_ext) click to toggle source

For reference to the assumed RELS-EXT see the following spec in CurateND @see github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97

# File lib/rof/translators/osf_to_rof.rb, line 124
def apply_previous_archived_version_if_applicable(rels_ext)
  # If a previously archived pid was passed in, use it to set pav:previousVersion
  # If not, check SOLR for one.
  pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier)
  pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil?
  rels_ext['pav:previousVersion'] = pid if pid
  rels_ext
end
build_archive_record() click to toggle source

Constructs OsfArchive Record from ttl_data, data from the UI form, and task config data

# File lib/rof/translators/osf_to_rof.rb, line 135
def build_archive_record
  this_rof = {}
  this_rof['owner'] = project['owner']
  this_rof['type'] = 'OsfArchive'
  this_rof['rights'] = map_rights
  this_rof['rels-ext'] = map_rels_ext
  this_rof['metadata'] = map_metadata
  this_rof['files'] = [source_slug + '.tar.gz']
  this_rof
end
fetch_from_ttl(ttl_file) click to toggle source

reads a ttl file and makes it a JSON-LD file that we can parse

# File lib/rof/translators/osf_to_rof.rb, line 80
def fetch_from_ttl(ttl_file)
  graph = RDF::Turtle::Reader.open(ttl_file, prefixes:  ROF::OsfPrefixList.dup)
  JSON::LD::API.fromRdf(graph)
end
map_creator() click to toggle source

sets the creator- needs to read another ttl for the User data only contrubutors with isBibliographic true are considered

# File lib/rof/translators/osf_to_rof.rb, line 171
def map_creator
  creator = []
  ttl_data[0][@osf_map['hasContributor']].each do |contributor|
    # Looping through the primary document and the contributors
    ttl_data.each do |item|
      next unless item['@id'] == contributor['@id']
      if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
        creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id'])
      end
    end
  end
  creator
end
map_metadata() click to toggle source

sets metadata

# File lib/rof/translators/osf_to_rof.rb, line 104
def map_metadata
  metadata = {}
  metadata['@context'] = ROF::RdfContext.dup
  # metdata derived from project ttl file
  metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
  metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
  metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value']
  metadata['dc:subject'] = map_subject
  # metadata derived from osf_projects data, passed from UI
  metadata['dc:source'] = 'https://osf.io/' + source_slug
  metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
  metadata['dc:creator#affiliation'] = project['affiliation']
  metadata['nd:osfProjectIdentifier'] = osf_project_identifier
  metadata['dc:creator'] = map_creator
  metadata['dc:type'] = project['package_type']
  metadata
end
map_rels_ext() click to toggle source

Maps RELS-EXT

# File lib/rof/translators/osf_to_rof.rb, line 96
def map_rels_ext
  rels_ext = {}
  rels_ext['@context'] = ROF::RelsExtRefContext.dup
  apply_previous_archived_version_if_applicable(rels_ext)
  rels_ext
end
map_rights() click to toggle source

figures out the rights

# File lib/rof/translators/osf_to_rof.rb, line 161
def map_rights
  rights = {}
  if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true'
    rights['read-groups'] = ['public']
  end
  rights
end
map_subject() click to toggle source

sets subject

# File lib/rof/translators/osf_to_rof.rb, line 147
def map_subject
  if ttl_data[0].key?(@osf_map['dc:subject'])
    return ttl_data[0][@osf_map['dc:subject']][0]['@value']
  end
  ''
end
map_user_from_ttl(file_subpath) click to toggle source

read user ttl file, extract User's full name

# File lib/rof/translators/osf_to_rof.rb, line 186
def map_user_from_ttl(file_subpath)
  user_ttl_data = ttl_from_targz(File.basename(file_subpath))
  user_ttl_data[0][@osf_map['hasFullName']][0]['@value']
end
osf_url_from_filename(ttl_file) click to toggle source

make osf url from bagfile name

# File lib/rof/translators/osf_to_rof.rb, line 155
def osf_url_from_filename(ttl_file)
  project_id = ttl_file.rpartition('/')[2].rpartition('.')[0]
  project_id
end
ttl_from_targz(ttl_filename) click to toggle source

extracts given ttl file from JHU tar.gz package

  • assumed to live under data/obj/root

@return [Array<Hash>] the first element is the “work” and the additional elements, if any, are the contributor(s)

# File lib/rof/translators/osf_to_rof.rb, line 88
def ttl_from_targz(ttl_filename)
  package_dir = config.fetch('package_dir')
  ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename)
  ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path)
  fetch_from_ttl(File.join(package_dir, ttl_path))
end