class NewspaperWorks::Data::WorkDerivatives

TODO: consider compositional refactoring (not mixins), but this

may make readability/comprehendability higher, and yield
higher applied/practical complexity.

Attributes

remap_names[RW]
assigned[RW]

Assigned attachment queue (of paths) @return [Array<String>] list of paths queued for attachment

fileset[RW]

FileSet is secondary adapted context @return [FileSet] fileset for work, with regard to these derivatives

parent[RW]

Parent pointer to WorkFile object representing fileset @return [NewspaperWorks::Data::WorkFile] WorkFile for fileset, work pair

unassigned[RW]

Assigned deletion queue (of destination names) @return [Array<String>] list of destination names queued for deletion

work[RW]

Work is primary adapted context @return [ActiveFedora::Base] Hyrax work-type object

Public Class Methods

new(work, fileset = nil, parent = nil) click to toggle source

Adapt work and either specific or first fileset

# File lib/newspaper_works/data/work_derivatives.rb, line 46
def initialize(work, fileset = nil, parent = nil)
  # adapted context usually work, may be string id of FileSet
  @work = work
  @fileset = fileset.nil? ? first_fileset : fileset
  # computed name-to-path mapping, initially nil as sentinel for JIT load
  @paths = nil
  # assignments for attachment
  @assigned = []
  # un-assignments for deletion
  @unassigned = []
  # parent is NewspaperWorks::Data::WorkFile object for derivatives
  @parent = parent
end
of(work, fileset = nil, parent = nil) click to toggle source

alternate constructor spelling:

# File lib/newspaper_works/data/work_derivatives.rb, line 41
def self.of(work, fileset = nil, parent = nil)
  new(work, fileset, parent)
end

Public Instance Methods

assign(path) click to toggle source

Assign a path to assigned queue for attachment @param path [String] Path to source file

# File lib/newspaper_works/data/work_derivatives.rb, line 71
def assign(path)
  path = normalize_path(path)
  validate_path(path)
  @assigned.push(path)
  # We are keeping assignment both in ephemeral, transient @assigned
  #   and mirroring to db to share context with other components:
  log_assignment(path, path_destination_name(path))
end
attach(file, name) click to toggle source

attach a single derivative file to work @param file [String, IO] path to file or IO object @param name [String] destination name, usually file extension

# File lib/newspaper_works/data/work_derivatives.rb, line 132
def attach(file, name)
  raise 'Cannot save for nil fileset' if fileset.nil?
  mkdir_pairtree
  path = path_factory.derivative_path_for_reference(fileset, name)
  # if file argument is path, copy file
  if file.class == String
    FileUtils.copy(file, path)
  else
    # otherwise, presume file is an IO, read, write it
    #   note: does not close input file/IO, presume that is caller's
    #   responsibility.
    orig_pos = file.tell
    file.seek(0)
    File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
    file.seek(orig_pos)
  end
  # finally, reload @paths after mutation
  load_paths
end
commit!() click to toggle source

commit pending changes to work files

beginning with removals, then with new assignments
# File lib/newspaper_works/data/work_derivatives.rb, line 95
def commit!
  @unassigned.each { |name| delete(name) }
  @assigned.each do |path|
    attach(path, path_destination_name(path))
  end
  # reset queues after work is complete
  @assigned = []
  @unassigned = []
end
commit_queued!(file_set) click to toggle source

Given a fileset meeting both of the following conditions:

1. a non-nil import_url value;
2. is attached to a work (persisted in Fedora, if not yet in Solr)...

…this method gets associated derivative paths queued and attach all. @param file_set [FileSet] saved file set, attached to work,

with identifier, and a non-nil import_url
# File lib/newspaper_works/data/work_derivatives.rb, line 111
def commit_queued!(file_set)
  raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
  import_path = file_url_to_path(file_set.import_url)
  work = file_set.member_of.select(&:work?)[0]
  raise ArgumentError, 'Work not found for fileset' if work.nil?
  derivatives = WorkDerivatives.of(work, file_set)
  IngestFileRelation.derivatives_for_file(import_path).each do |path|
    next unless File.exist?(path)
    attachment_record = DerivativeAttachment.where(path: path).first
    derivatives.attach(path, attachment_record.destination_name)
    # update previously nil fileset id
    attachment_record.fileset_id = file_set.id
    attachment_record.save!
  end
  @fileset ||= file_set
  load_paths
end
data(name) click to toggle source

Get raw binary or encoded text data of file as a String @param name [String] destination name, usually file extension @return [String] Raw bytes, or if text file, a UTF-8 encoded String

# File lib/newspaper_works/data/work_derivatives.rb, line 217
def data(name)
  result = ''
  with_io(name) do |io|
    result += io.read
  end
  result
end
delete(name, force: nil) click to toggle source

Delete a derivative file from work, by destination name @param name [String] destination name, usually file extension

# File lib/newspaper_works/data/work_derivatives.rb, line 154
def delete(name, force: nil)
  raise 'Cannot save for nil fileset' if fileset.nil?
  path = path_factory.derivative_path_for_reference(fileset, name)
  # will remove file, if it exists; won't remove pairtree, even
  #   if it becomes empty, as that is excess scope.
  FileUtils.rm(path, force: force) if File.exist?(path)
  # finally, reload @paths after mutation
  load_paths
end
exist?(name) click to toggle source

Check if derivative file exists for destination name @param name [String] optional destination name, usually file extension @return [TrueClass, FalseClass] boolean

# File lib/newspaper_works/data/work_derivatives.rb, line 210
def exist?(name)
  keys.include?(name) && File.exist?(self[name])
end
load_paths() click to toggle source

Load all paths/names to @paths once, upon first access

# File lib/newspaper_works/data/work_derivatives.rb, line 165
def load_paths
  fsid = fileset_id
  if fsid.nil?
    @paths = {}
    return
  end
  # list of paths
  paths = path_factory.derivatives_for_reference(fsid)
  # names from paths
  @paths = paths.map { |e| [path_destination_name(e), e] }.to_h
end
path(name) click to toggle source

path to existing derivative file for destination name @param name [String] destination name, usually file extension @return [String, NilClass] path (or nil)

# File lib/newspaper_works/data/work_derivatives.rb, line 180
def path(name)
  load_paths if @paths.nil?
  result = @paths[name]
  return if result.nil?
  File.exist?(result) ? result : nil
end
size(name = nil) click to toggle source

Get number of derivatives or, if a destination name argument

is provided, the size of derivative file

@param name [String] optional destination name, usually file extension @return [Integer] size in bytes

# File lib/newspaper_works/data/work_derivatives.rb, line 201
def size(name = nil)
  load_paths if @paths.nil?
  return @paths.size if name.nil?
  File.size(@paths[name])
end
state() click to toggle source

Assignment state @return [String] A label describing the state of assignment queues

# File lib/newspaper_works/data/work_derivatives.rb, line 62
def state
  load_paths
  return 'dirty' unless @unassigned.empty? && @assigned.empty?
  return 'empty' if @paths.keys.empty?
  'saved'
end
unassign(name) click to toggle source

Assign a destination name to unassigned queue for deletion – OR –

remove a path from queue of assigned items

@param name [String] Destination name (file extension), or source path

# File lib/newspaper_works/data/work_derivatives.rb, line 83
def unassign(name)
  # if name is queued path, remove from @assigned queue:
  if @assigned.include?(name)
    @assigned.delete(name)
    unlog_assignment(name, path_destination_name(name))
  end
  # if name is known destination name, remove
  @unassigned.push(name) if exist?(name)
end
with_io(name, &block) click to toggle source

Run a block in context of the opened derivative file for reading @param name [String] destination name, usually file extension @param block [Proc] block/proc to run in context of file IO

# File lib/newspaper_works/data/work_derivatives.rb, line 190
def with_io(name, &block)
  mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
  filepath = path(name)
  return if filepath.nil?
  File.open(filepath, mode, &block)
end

Private Instance Methods

file_url_to_path(url) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 241
def file_url_to_path(url)
  url.gsub('file://', '')
end
log_assignment(path, name) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 254
def log_assignment(path, name)
  NewspaperWorks::DerivativeAttachment.create!(
    fileset_id: fileset_id,
    path: path,
    destination_name: name
  )
  log_primary_file_relation(path)
end
log_primary_file_relation(path) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 245
def log_primary_file_relation(path)
  file_path = primary_file_path
  return if file_path.nil?
  NewspaperWorks::IngestFileRelation.create!(
    file_path: file_path,
    derivative_path: path
  )
end
method_missing(method, *args, &block) click to toggle source
Calls superclass method
# File lib/newspaper_works/data/work_derivatives.rb, line 289
def method_missing(method, *args, &block)
  # if we proxy mapping/hash enumertion methods,
  #   make sure @paths loaded, then proxy to it.
  if respond_to_missing?(method)
    load_paths if @paths.nil?
    return @paths.send(method, *args, &block)
  end
  super
end
mkdir_pairtree() click to toggle source

make shared path for derivatives to live, given

# File lib/newspaper_works/data/work_derivatives.rb, line 304
def mkdir_pairtree
  # Hyrax::DerivativePath has no public method to directly get the
  #   bare pairtree path for derivatives for a fileset, but we
  #   can infer it...
  path = path_factory.derivative_path_for_reference(fileset, '')
  dir = File.join(path.split('/')[0..-2])
  FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
end
path_destination_name(path) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 280
def path_destination_name(path)
  ext = path.split('.')[-1]
  self.class.remap_names[ext] || ext
end
path_factory() click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 299
def path_factory
  Hyrax::DerivativePath
end
primary_file_path() click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 227
def primary_file_path
  if fileset.nil?
    # if there is a nil fileset, we look for *intent* in the form
    #   of the first assigned file path for single-file work.
    work_file = parent
    return if work_file.nil?
    work_files = work_file.parent
    return if work_files.nil?
    work_files.assigned[0]
  else
    file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
  end
end
respond_to_missing?(symbol, include_priv = false) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 285
def respond_to_missing?(symbol, include_priv = false)
  {}.respond_to?(symbol, include_priv)
end
unlog_assignment(path, name) click to toggle source
# File lib/newspaper_works/data/work_derivatives.rb, line 263
def unlog_assignment(path, name)
  if fileset_id.nil?
    NewspaperWorks::DerivativeAttachment.where(
      path: path,
      destination_name: name
    ).destroy_all
  else
    NewspaperWorks::DerivativeAttachment.where(
      fileset_id: fileset_id,
      path: path,
      destination_name: name
    ).destroy_all
  end
  # note: there is deliberately no attempt to "unlog" primary
  #   file relation, as leaving it should have no side-effect.
end