# File lib/newspaper_works/data/work_derivatives.rb, line 227 def primary_file_path if fileset.nil? # if there is a nil fileset, we look for *intent* in the form # of the first assigned file path for single-file work. work_file = parent return if work_file.nil? work_files = work_file.parent return if work_files.nil? work_files.assigned[0] else file_url_to_path(fileset.import_url) unless fileset.import_url.nil? end end
class NewspaperWorks::Data::WorkDerivatives
TODO: consider compositional refactoring (not mixins), but this
may make readability/comprehendability higher, and yield higher applied/practical complexity.
Attributes
Assigned attachment queue (of paths) @return [Array<String>] list of paths queued for attachment
FileSet is secondary adapted context @return [FileSet] fileset for work, with regard to these derivatives
Assigned deletion queue (of destination names) @return [Array<String>] list of destination names queued for deletion
Work is primary adapted context @return [ActiveFedora::Base] Hyrax work-type object
Public Class Methods
Adapt work and either specific or first fileset
# File lib/newspaper_works/data/work_derivatives.rb, line 46 def initialize(work, fileset = nil, parent = nil) # adapted context usually work, may be string id of FileSet @work = work @fileset = fileset.nil? ? first_fileset : fileset # computed name-to-path mapping, initially nil as sentinel for JIT load @paths = nil # assignments for attachment @assigned = [] # un-assignments for deletion @unassigned = [] # parent is NewspaperWorks::Data::WorkFile object for derivatives @parent = parent end
alternate constructor spelling:
# File lib/newspaper_works/data/work_derivatives.rb, line 41 def self.of(work, fileset = nil, parent = nil) new(work, fileset, parent) end
Public Instance Methods
Assign a path to assigned queue for attachment @param path [String] Path to source file
# File lib/newspaper_works/data/work_derivatives.rb, line 71 def assign(path) path = normalize_path(path) validate_path(path) @assigned.push(path) # We are keeping assignment both in ephemeral, transient @assigned # and mirroring to db to share context with other components: log_assignment(path, path_destination_name(path)) end
attach a single derivative file to work @param file [String, IO] path to file or IO object @param name [String] destination name, usually file extension
# File lib/newspaper_works/data/work_derivatives.rb, line 132 def attach(file, name) raise 'Cannot save for nil fileset' if fileset.nil? mkdir_pairtree path = path_factory.derivative_path_for_reference(fileset, name) # if file argument is path, copy file if file.class == String FileUtils.copy(file, path) else # otherwise, presume file is an IO, read, write it # note: does not close input file/IO, presume that is caller's # responsibility. orig_pos = file.tell file.seek(0) File.open(path, 'w') { |dstfile| dstfile.write(file.read) } file.seek(orig_pos) end # finally, reload @paths after mutation load_paths end
commit pending changes to work files
beginning with removals, then with new assignments
# File lib/newspaper_works/data/work_derivatives.rb, line 95 def commit! @unassigned.each { |name| delete(name) } @assigned.each do |path| attach(path, path_destination_name(path)) end # reset queues after work is complete @assigned = [] @unassigned = [] end
Given a fileset meeting both of the following conditions:
1. a non-nil import_url value; 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
…this method gets associated derivative paths queued and attach all. @param file_set [FileSet] saved file set, attached to work,
with identifier, and a non-nil import_url
# File lib/newspaper_works/data/work_derivatives.rb, line 111 def commit_queued!(file_set) raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil? import_path = file_url_to_path(file_set.import_url) work = file_set.member_of.select(&:work?)[0] raise ArgumentError, 'Work not found for fileset' if work.nil? derivatives = WorkDerivatives.of(work, file_set) IngestFileRelation.derivatives_for_file(import_path).each do |path| next unless File.exist?(path) attachment_record = DerivativeAttachment.where(path: path).first derivatives.attach(path, attachment_record.destination_name) # update previously nil fileset id attachment_record.fileset_id = file_set.id attachment_record.save! end @fileset ||= file_set load_paths end
Get raw binary or encoded text data of file as a String @param name [String] destination name, usually file extension @return [String] Raw bytes, or if text file, a UTF-8 encoded String
# File lib/newspaper_works/data/work_derivatives.rb, line 217 def data(name) result = '' with_io(name) do |io| result += io.read end result end
Delete a derivative file from work, by destination name @param name [String] destination name, usually file extension
# File lib/newspaper_works/data/work_derivatives.rb, line 154 def delete(name, force: nil) raise 'Cannot save for nil fileset' if fileset.nil? path = path_factory.derivative_path_for_reference(fileset, name) # will remove file, if it exists; won't remove pairtree, even # if it becomes empty, as that is excess scope. FileUtils.rm(path, force: force) if File.exist?(path) # finally, reload @paths after mutation load_paths end
Check if derivative file exists for destination name @param name [String] optional destination name, usually file extension @return [TrueClass, FalseClass] boolean
# File lib/newspaper_works/data/work_derivatives.rb, line 210 def exist?(name) keys.include?(name) && File.exist?(self[name]) end
Load all paths/names to @paths once, upon first access
# File lib/newspaper_works/data/work_derivatives.rb, line 165 def load_paths fsid = fileset_id if fsid.nil? @paths = {} return end # list of paths paths = path_factory.derivatives_for_reference(fsid) # names from paths @paths = paths.map { |e| [path_destination_name(e), e] }.to_h end
path to existing derivative file for destination name @param name [String] destination name, usually file extension @return [String, NilClass] path (or nil)
# File lib/newspaper_works/data/work_derivatives.rb, line 180 def path(name) load_paths if @paths.nil? result = @paths[name] return if result.nil? File.exist?(result) ? result : nil end
Get number of derivatives or, if a destination name argument
is provided, the size of derivative file
@param name [String] optional destination name, usually file extension @return [Integer] size in bytes
# File lib/newspaper_works/data/work_derivatives.rb, line 201 def size(name = nil) load_paths if @paths.nil? return @paths.size if name.nil? File.size(@paths[name]) end
Assignment state @return [String] A label describing the state of assignment queues
# File lib/newspaper_works/data/work_derivatives.rb, line 62 def state load_paths return 'dirty' unless @unassigned.empty? && @assigned.empty? return 'empty' if @paths.keys.empty? 'saved' end
Assign a destination name to unassigned queue for deletion – OR –
remove a path from queue of assigned items
@param name [String] Destination name (file extension), or source path
# File lib/newspaper_works/data/work_derivatives.rb, line 83 def unassign(name) # if name is queued path, remove from @assigned queue: if @assigned.include?(name) @assigned.delete(name) unlog_assignment(name, path_destination_name(name)) end # if name is known destination name, remove @unassigned.push(name) if exist?(name) end
Run a block in context of the opened derivative file for reading @param name [String] destination name, usually file extension @param block [Proc] block/proc to run in context of file IO
# File lib/newspaper_works/data/work_derivatives.rb, line 190 def with_io(name, &block) mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb' filepath = path(name) return if filepath.nil? File.open(filepath, mode, &block) end
Private Instance Methods
# File lib/newspaper_works/data/work_derivatives.rb, line 241 def file_url_to_path(url) url.gsub('file://', '') end
# File lib/newspaper_works/data/work_derivatives.rb, line 254 def log_assignment(path, name) NewspaperWorks::DerivativeAttachment.create!( fileset_id: fileset_id, path: path, destination_name: name ) log_primary_file_relation(path) end
# File lib/newspaper_works/data/work_derivatives.rb, line 245 def log_primary_file_relation(path) file_path = primary_file_path return if file_path.nil? NewspaperWorks::IngestFileRelation.create!( file_path: file_path, derivative_path: path ) end
# File lib/newspaper_works/data/work_derivatives.rb, line 289 def method_missing(method, *args, &block) # if we proxy mapping/hash enumertion methods, # make sure @paths loaded, then proxy to it. if respond_to_missing?(method) load_paths if @paths.nil? return @paths.send(method, *args, &block) end super end
make shared path for derivatives to live, given
# File lib/newspaper_works/data/work_derivatives.rb, line 304 def mkdir_pairtree # Hyrax::DerivativePath has no public method to directly get the # bare pairtree path for derivatives for a fileset, but we # can infer it... path = path_factory.derivative_path_for_reference(fileset, '') dir = File.join(path.split('/')[0..-2]) FileUtils.mkdir_p(dir) unless Dir.exist?(dir) end
# File lib/newspaper_works/data/work_derivatives.rb, line 280 def path_destination_name(path) ext = path.split('.')[-1] self.class.remap_names[ext] || ext end
# File lib/newspaper_works/data/work_derivatives.rb, line 299 def path_factory Hyrax::DerivativePath end
# File lib/newspaper_works/data/work_derivatives.rb, line 285 def respond_to_missing?(symbol, include_priv = false) {}.respond_to?(symbol, include_priv) end
# File lib/newspaper_works/data/work_derivatives.rb, line 263 def unlog_assignment(path, name) if fileset_id.nil? NewspaperWorks::DerivativeAttachment.where( path: path, destination_name: name ).destroy_all else NewspaperWorks::DerivativeAttachment.where( fileset_id: fileset_id, path: path, destination_name: name ).destroy_all end # note: there is deliberately no attempt to "unlog" primary # file relation, as leaving it should have no side-effect. end