class Hydra::Works::CharacterizationService

Attributes

mapping[RW]
object[RW]
parser_class[RW]
source[RW]
tools[RW]

Public Class Methods

new(object, source, options) click to toggle source
# File lib/hydra/works/services/characterization_service.rb, line 16
def initialize(object, source, options)
  @object       = object
  @source       = source
  @mapping      = options.fetch(:parser_mapping, Hydra::Works::Characterization.mapper)
  @parser_class = options.fetch(:parser_class, Hydra::Works::Characterization::FitsDocument)
  @tools        = options.fetch(:ch12n_tool, :fits)
end
run(object, source = nil, options = {}) click to toggle source

@param [Hydra::PCDM::File] object which has properties to recieve characterization values. @param [String, File] source for characterization to be run on. File object or path on disk.

If none is provided, it will assume the binary content already present on the object.

@param [Hash] options to be passed to characterization. parser_mapping:, parser_class:, tools:

# File lib/hydra/works/services/characterization_service.rb, line 10
def self.run(object, source = nil, options = {})
  new(object, source, options).characterize
end

Public Instance Methods

characterize() click to toggle source

Get given source into form that can be passed to Hydra::FileCharacterization Use Hydra::FileCharacterization to extract metadata (an OM XML document) Get the terms (and their values) from the extracted metadata Assign the values of the terms to the properties of the object

# File lib/hydra/works/services/characterization_service.rb, line 28
def characterize
  content = source_to_content
  extracted_md = extract_metadata(content)
  terms = parse_metadata(extracted_md)
  store_metadata(terms)
end

Protected Instance Methods

append_property_value(property, value) click to toggle source
# File lib/hydra/works/services/characterization_service.rb, line 105
def append_property_value(property, value)
  # We don't want multiple mime_types; this overwrites each time to accept last value
  value = object.send(property) + [value] unless property == :mime_type
  # We don't want multiple heights / widths, pick the max
  value = value.map(&:to_i).max.to_s if property == :height || property == :width
  object.send("#{property}=", value)
end
characterization_terms(omdoc) click to toggle source

Get proxy terms and values from the parser

# File lib/hydra/works/services/characterization_service.rb, line 71
def characterization_terms(omdoc)
  h = {}
  omdoc.class.terminology.terms.each_pair do |key, target|
    # a key is a proxy if its target responds to proxied_term
    next unless target.respond_to? :proxied_term
    begin
      h[key] = omdoc.send(key)
    rescue NoMethodError
      next
    end
  end
  h.delete_if { |_k, v| v.empty? }
end
extract_metadata(content) click to toggle source
# File lib/hydra/works/services/characterization_service.rb, line 46
def extract_metadata(content)
  Hydra::FileCharacterization.characterize(content, file_name, tools) do |cfg|
    cfg[:fits] = Hydra::Derivatives.fits_path
  end
end
file_name() click to toggle source

Determine the filename to send to Hydra::FileCharacterization. If no source is present, use the name of the file from the object; otherwise, use the supplied source.

# File lib/hydra/works/services/characterization_service.rb, line 54
def file_name
  if source
    source.is_a?(File) ? File.basename(source.path) : File.basename(source)
  else
    object.original_name.nil? ? "original_file" : object.original_name
  end
end
parse_metadata(metadata) click to toggle source

Use OM to parse metadata

# File lib/hydra/works/services/characterization_service.rb, line 63
def parse_metadata(metadata)
  omdoc = parser_class.new
  omdoc.ng_xml = Nokogiri::XML(metadata) if metadata.present?
  omdoc.__cleanup__ if omdoc.respond_to? :__cleanup__
  characterization_terms(omdoc)
end
property_for(term) click to toggle source

Check parser_config then self for matching term. Return property symbol or nil

# File lib/hydra/works/services/characterization_service.rb, line 97
def property_for(term)
  if mapping.key?(term) && object.respond_to?(mapping[term])
    mapping[term]
  elsif object.respond_to?(term)
    term
  end
end
source_to_content() click to toggle source

@return content of object if source is nil; otherwise, return a File or the source

# File lib/hydra/works/services/characterization_service.rb, line 38
def source_to_content
  return object.content if source.nil?
  # do not read the file into memory It could be huge...
  return File.open(source) if source.is_a? String
  source.rewind
  source.read
end
store_metadata(terms) click to toggle source

Assign values of the instance properties from the metadata mapping :prop => val

# File lib/hydra/works/services/characterization_service.rb, line 86
def store_metadata(terms)
  terms.each_pair do |term, value|
    property = property_for(term)
    next if property.nil?
    # Array-ify the value to avoid a conditional here
    Array(value).each { |v| append_property_value(property, v) }
  end
end