class Hydra::Works::CharacterizationService
Attributes
Public Class Methods
# File lib/hydra/works/services/characterization_service.rb, line 16 def initialize(object, source, options) @object = object @source = source @mapping = options.fetch(:parser_mapping, Hydra::Works::Characterization.mapper) @parser_class = options.fetch(:parser_class, Hydra::Works::Characterization::FitsDocument) @tools = options.fetch(:ch12n_tool, :fits) end
@param [Hydra::PCDM::File] object which has properties to recieve characterization values. @param [String, File] source for characterization to be run on. File object or path on disk.
If none is provided, it will assume the binary content already present on the object.
@param [Hash] options to be passed to characterization. parser_mapping:, parser_class
:, tools:
# File lib/hydra/works/services/characterization_service.rb, line 10 def self.run(object, source = nil, options = {}) new(object, source, options).characterize end
Public Instance Methods
Get given source into form that can be passed to Hydra::FileCharacterization Use Hydra::FileCharacterization to extract metadata (an OM XML document) Get the terms (and their values) from the extracted metadata Assign the values of the terms to the properties of the object
# File lib/hydra/works/services/characterization_service.rb, line 28 def characterize content = source_to_content extracted_md = extract_metadata(content) terms = parse_metadata(extracted_md) store_metadata(terms) end
Protected Instance Methods
# File lib/hydra/works/services/characterization_service.rb, line 105 def append_property_value(property, value) # We don't want multiple mime_types; this overwrites each time to accept last value value = object.send(property) + [value] unless property == :mime_type # We don't want multiple heights / widths, pick the max value = value.map(&:to_i).max.to_s if property == :height || property == :width object.send("#{property}=", value) end
Get proxy terms and values from the parser
# File lib/hydra/works/services/characterization_service.rb, line 71 def characterization_terms(omdoc) h = {} omdoc.class.terminology.terms.each_pair do |key, target| # a key is a proxy if its target responds to proxied_term next unless target.respond_to? :proxied_term begin h[key] = omdoc.send(key) rescue NoMethodError next end end h.delete_if { |_k, v| v.empty? } end
# File lib/hydra/works/services/characterization_service.rb, line 46 def extract_metadata(content) Hydra::FileCharacterization.characterize(content, file_name, tools) do |cfg| cfg[:fits] = Hydra::Derivatives.fits_path end end
Determine the filename to send to Hydra::FileCharacterization. If no source is present, use the name of the file from the object; otherwise, use the supplied source.
# File lib/hydra/works/services/characterization_service.rb, line 54 def file_name if source source.is_a?(File) ? File.basename(source.path) : File.basename(source) else object.original_name.nil? ? "original_file" : object.original_name end end
Use OM to parse metadata
# File lib/hydra/works/services/characterization_service.rb, line 63 def parse_metadata(metadata) omdoc = parser_class.new omdoc.ng_xml = Nokogiri::XML(metadata) if metadata.present? omdoc.__cleanup__ if omdoc.respond_to? :__cleanup__ characterization_terms(omdoc) end
Check parser_config then self for matching term. Return property symbol or nil
# File lib/hydra/works/services/characterization_service.rb, line 97 def property_for(term) if mapping.key?(term) && object.respond_to?(mapping[term]) mapping[term] elsif object.respond_to?(term) term end end
@return content of object if source is nil; otherwise, return a File or the source
# File lib/hydra/works/services/characterization_service.rb, line 38 def source_to_content return object.content if source.nil? # do not read the file into memory It could be huge... return File.open(source) if source.is_a? String source.rewind source.read end
Assign values of the instance properties from the metadata mapping :prop => val
# File lib/hydra/works/services/characterization_service.rb, line 86 def store_metadata(terms) terms.each_pair do |term, value| property = property_for(term) next if property.nil? # Array-ify the value to avoid a conditional here Array(value).each { |v| append_property_value(property, v) } end end