class ROF::Translators::OsfToRof
Class for managing OSF Archive data transformations It is called after the get-from-osf task, and before the work-xlat task
Attributes
A function responsible for finding the previously archive pid. @return [#call] @see default_previously_archived_pid_finder for interface
this is an array- the addition elements are the contributor(s) @return [Array<Hash>] @see ttl_from_targz
Public Class Methods
Convert Osf Archive tar.gz to ROF
# File lib/rof/translators/osf_to_rof.rb, line 21 def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder) new(project, config, previously_archived_pid_finder).call end
@todo Set this to be something more meaningful than an empty lambda @return [#call]
# File lib/rof/translators/osf_to_rof.rb, line 16 def self.default_previously_archived_pid_finder ->(archive_type, osf_project_identifier) {} end
# File lib/rof/translators/osf_to_rof.rb, line 25 def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder) @config = config @project = project @previously_archived_pid_finder = previously_archived_pid_finder @osf_map = ROF::OsfToNDMap end
Public Instance Methods
@api private @see github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62
# File lib/rof/translators/osf_to_rof.rb, line 42 def archive_type project.fetch('package_type') end
# File lib/rof/translators/osf_to_rof.rb, line 32 def call rof_array = [] return {} if project.nil? @ttl_data = ttl_from_targz(source_slug + '.ttl') rof_array[0] = build_archive_record rof_array end
@api private @see github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106
# File lib/rof/translators/osf_to_rof.rb, line 60 def osf_project_identifier return source_slug if project['package_type'] == 'OSF Project' osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id']) end
@api private This is a bit of a misnomer; As used it represents the path to the project or registration that we have ingested (e.g. osf.io/:source_slug)
It was previously named :project_identifier in this class, but that gets conflated with the underlying object's osf_project_identifier
(e.g. what OSF Project was this archive originally based on)
# File lib/rof/translators/osf_to_rof.rb, line 54 def source_slug project.fetch('project_identifier') end
Private Instance Methods
For reference to the assumed RELS-EXT see the following spec in CurateND @see github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97
# File lib/rof/translators/osf_to_rof.rb, line 124 def apply_previous_archived_version_if_applicable(rels_ext) # If a previously archived pid was passed in, use it to set pav:previousVersion # If not, check SOLR for one. pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier) pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil? rels_ext['pav:previousVersion'] = pid if pid rels_ext end
Constructs OsfArchive Record from ttl_data
, data from the UI form, and task config data
# File lib/rof/translators/osf_to_rof.rb, line 135 def build_archive_record this_rof = {} this_rof['owner'] = project['owner'] this_rof['type'] = 'OsfArchive' this_rof['rights'] = map_rights this_rof['rels-ext'] = map_rels_ext this_rof['metadata'] = map_metadata this_rof['files'] = [source_slug + '.tar.gz'] this_rof end
reads a ttl file and makes it a JSON-LD file that we can parse
# File lib/rof/translators/osf_to_rof.rb, line 80 def fetch_from_ttl(ttl_file) graph = RDF::Turtle::Reader.open(ttl_file, prefixes: ROF::OsfPrefixList.dup) JSON::LD::API.fromRdf(graph) end
sets the creator- needs to read another ttl for the User data only contrubutors with isBibliographic true are considered
# File lib/rof/translators/osf_to_rof.rb, line 171 def map_creator creator = [] ttl_data[0][@osf_map['hasContributor']].each do |contributor| # Looping through the primary document and the contributors ttl_data.each do |item| next unless item['@id'] == contributor['@id'] if item[@osf_map['isBibliographic']][0]['@value'] == 'true' creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id']) end end end creator end
sets metadata
# File lib/rof/translators/osf_to_rof.rb, line 104 def map_metadata metadata = {} metadata['@context'] = ROF::RdfContext.dup # metdata derived from project ttl file metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z' metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value'] metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value'] metadata['dc:subject'] = map_subject # metadata derived from osf_projects data, passed from UI metadata['dc:source'] = 'https://osf.io/' + source_slug metadata['dc:creator#adminstrative_unit'] = project['administrative_unit'] metadata['dc:creator#affiliation'] = project['affiliation'] metadata['nd:osfProjectIdentifier'] = osf_project_identifier metadata['dc:creator'] = map_creator metadata['dc:type'] = project['package_type'] metadata end
Maps RELS-EXT
# File lib/rof/translators/osf_to_rof.rb, line 96 def map_rels_ext rels_ext = {} rels_ext['@context'] = ROF::RelsExtRefContext.dup apply_previous_archived_version_if_applicable(rels_ext) rels_ext end
figures out the rights
# File lib/rof/translators/osf_to_rof.rb, line 161 def map_rights rights = {} if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true' rights['read-groups'] = ['public'] end rights end
sets subject
# File lib/rof/translators/osf_to_rof.rb, line 147 def map_subject if ttl_data[0].key?(@osf_map['dc:subject']) return ttl_data[0][@osf_map['dc:subject']][0]['@value'] end '' end
read user ttl file, extract User's full name
# File lib/rof/translators/osf_to_rof.rb, line 186 def map_user_from_ttl(file_subpath) user_ttl_data = ttl_from_targz(File.basename(file_subpath)) user_ttl_data[0][@osf_map['hasFullName']][0]['@value'] end
make osf url from bagfile name
# File lib/rof/translators/osf_to_rof.rb, line 155 def osf_url_from_filename(ttl_file) project_id = ttl_file.rpartition('/')[2].rpartition('.')[0] project_id end
extracts given ttl file from JHU tar.gz package
-
assumed to live under data/obj/root
@return [Array<Hash>] the first element is the “work” and the additional elements, if any, are the contributor(s)
# File lib/rof/translators/osf_to_rof.rb, line 88 def ttl_from_targz(ttl_filename) package_dir = config.fetch('package_dir') ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename) ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path) fetch_from_ttl(File.join(package_dir, ttl_path)) end