class Moab::FileInventory

A structured container for recording information about a collection of related files.

The scope of the file collection depends on inventory type:

The inventory contains one or more {FileGroup} subsets, which are most commonly used to provide segregation of digital object version’s content and metadata files. Each group contains one or more {FileManifestation} entities, each of which represents a point-in-time snapshot of a given file’s filesystem characteristics. The fixity data for a file is stored in a {FileSignature} element, while the filename and modification data are stored in one or more {FileInstance} elements. (Copies of a given file may be present in multiple locations in a collection)

Data Model

@example {include:file:spec/fixtures/derivatives/manifests/v3/versionInventory.xml} @note Copyright © 2012 by The Board of Trustees of the Leland Stanford Junior University.

All rights reserved.  See {file:LICENSE.rdoc} for details.

Public Class Methods

new(opts = {}) click to toggle source

(see Serializable#initialize)

Calls superclass method Serializer::Serializable::new
# File lib/moab/file_inventory.rb, line 37
def initialize(opts = {})
  @groups = []
  @inventory_datetime = Time.now
  super(opts)
end
xml_filename(type = nil) click to toggle source

@api internal @param type [String] Specifies the type of inventory, and thus the filename used for storage @return [String] The standard name for the serialized inventory file of the given type

# File lib/moab/file_inventory.rb, line 248
def self.xml_filename(type = nil)
  case type
  when 'version'
    'versionInventory.xml'
  when 'additions'
    'versionAdditions.xml'
  when 'manifests'
    'manifestInventory.xml'
  when 'directory'
    'directoryInventory.xml'
  else
    raise ArgumentError, "unknown inventory type: #{type}"
  end
end

Public Instance Methods

block_count() click to toggle source
# File lib/moab/file_inventory.rb, line 92
def block_count
  groups.inject(0) { |sum, group| sum + group.block_count }
end
byte_count() click to toggle source
# File lib/moab/file_inventory.rb, line 84
def byte_count
  groups.inject(0) { |sum, group| sum + group.byte_count }
end
composite_key() click to toggle source

@return [String] The unique identifier concatenating digital object id with version id

# File lib/moab/file_inventory.rb, line 56
def composite_key
  "#{digital_object_id}-#{StorageObject.version_dirname(version_id)}"
end
copy_ids(other) click to toggle source

@api internal @param other [FileInventory] another instance of this class from which to clone identity values @return [void] Copy objectId and versionId values from another class instance into this instance

# File lib/moab/file_inventory.rb, line 148
def copy_ids(other)
  @digital_object_id = other.digital_object_id
  @version_id = other.version_id
  @inventory_datetime = other.inventory_datetime
end
data_source() click to toggle source

@api internal @return [String] Returns either the version ID (if inventory is a version manifest) or the name of the directory

that was harvested to create the inventory
# File lib/moab/file_inventory.rb, line 163
def data_source
  data_source = (groups.collect { |g| g.data_source.to_s }).join('|')
  if data_source.start_with?('contentMetadata')
    version_id ? "v#{version_id}-#{data_source}" : "new-#{data_source}"
  else
    version_id ? "v#{version_id}" : data_source
  end
end
file_count() click to toggle source
# File lib/moab/file_inventory.rb, line 76
def file_count
  groups.inject(0) { |sum, group| sum + group.file_count }
end
file_signature(group_id, file_id) click to toggle source

@param [String] group_id The identifer of the group to be selected @param [String] file_id The group-relative path of the file (relative to the appropriate home directory) @return [FileSignature] The signature of the specified file

# File lib/moab/file_inventory.rb, line 133
def file_signature(group_id, file_id)
  file_group = group(group_id)
  errmsg = "group #{group_id} not found for #{digital_object_id} - #{version_id}"
  raise FileNotFoundException, errmsg if file_group.nil?

  file_signature = file_group.path_hash[file_id]
  errmsg = "#{group_id} file #{file_id} not found for #{digital_object_id} - #{version_id}"
  raise FileNotFoundException, errmsg if file_signature.nil?

  file_signature
end
group(group_id) click to toggle source

@param [String] group_id The identifer of the group to be selected @return [FileGroup] The file group in this inventory for the specified group_id

# File lib/moab/file_inventory.rb, line 114
def group(group_id)
  groups.find { |group| group.group_id == group_id }
end
group_empty?(group_id) click to toggle source

@param group_id [String] File group identifer (e.g. data, metadata, manifests) @return [Boolean] true if the group is missing or empty

# File lib/moab/file_inventory.rb, line 120
def group_empty?(group_id)
  group = self.group(group_id)
  group.nil? || group.files.empty?
end
group_ids(non_empty = nil) click to toggle source

@param non_empty [Boolean] if true, return group_id’s only for groups having files @return [Array<String>] group identifiers contained in this file inventory

# File lib/moab/file_inventory.rb, line 107
def group_ids(non_empty = nil)
  my_groups = non_empty ? non_empty_groups : groups
  my_groups.map(&:group_id)
end
human_size() click to toggle source

@api internal @return [String] The total size of the inventory expressed in KB, MB, GB or TB, depending on the magnitutde of the value

# File lib/moab/file_inventory.rb, line 229
def human_size
  count = 0
  size = byte_count
  while (size >= 1024) && (count < 4)
    size /= 1024.0
    count += 1
  end
  if count == 0
    format('%d B', size)
  else
    # rubocop:disable Style/FormatStringToken
    format('%.2f %s', size, %w[B KB MB GB TB][count])
    # rubocop:enable Style/FormatStringToken
  end
end
inventory_datetime() click to toggle source
# File lib/moab/file_inventory.rb, line 68
def inventory_datetime
  Moab::UtcTime.output(@inventory_datetime)
end
inventory_datetime=(datetime) click to toggle source
# File lib/moab/file_inventory.rb, line 64
def inventory_datetime=(datetime)
  @inventory_datetime = Moab::UtcTime.input(datetime)
end
inventory_from_bagit_bag(bag_dir) click to toggle source

@param bag_dir [Pathname,String] The location of the BagIt bag to be inventoried @return [FileInventory] Traverse a BagIt bag’s payload and return an inventory of the files it contains

(using fixity from bag manifest files)
# File lib/moab/file_inventory.rb, line 192
def inventory_from_bagit_bag(bag_dir)
  bag_pathname = Pathname(bag_dir)
  signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
  bag_data_subdirs = bag_pathname.join('data').children
  bag_data_subdirs.each do |subdir|
    groups << FileGroup.new(group_id: subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
  end
  self
end
inventory_from_directory(data_dir, group_id = nil) click to toggle source

@api external @param data_dir [Pathname,String] The location of files to be inventoried @param group_id [String] if specified, is used to set the group ID of the FileGroup created from the directory

if nil, then the directory is assumed to contain both content and metadata subdirectories

@return [FileInventory] Traverse a directory and return an inventory of the files it contains @example {include:file:spec/features/inventory/harvest_inventory_spec.rb}

# File lib/moab/file_inventory.rb, line 178
def inventory_from_directory(data_dir, group_id = nil)
  if group_id
    groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
  else
    %w[content metadata].each do |gid|
      groups << FileGroup.new(group_id: gid).group_from_directory(Pathname(data_dir).join(gid))
    end
  end
  self
end
non_empty_groups() click to toggle source

@return [Array<FileGroup] The set of data groups that contain files

# File lib/moab/file_inventory.rb, line 101
def non_empty_groups
  groups.reject { |group| group.files.empty? }
end
package_id() click to toggle source

@api internal @return [String] Concatenation of the objectId and versionId values

# File lib/moab/file_inventory.rb, line 156
def package_id
  "#{digital_object_id}-v#{version_id}"
end
signatures_from_bagit_manifests(bag_pathname) click to toggle source

@param bag_pathname [Pathname] The location of the BagIt bag to be inventoried @return [Hash<Pathname,FileSignature>] The fixity data present in the bag’s manifest files

# File lib/moab/file_inventory.rb, line 204
def signatures_from_bagit_manifests(bag_pathname)
  manifest_pathname = {}
  DEFAULT_CHECKSUM_TYPES.each do |type|
    manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
  end
  signatures = Hash.new { |hash, path| hash[path] = FileSignature.new }
  DEFAULT_CHECKSUM_TYPES.each do |type|
    if manifest_pathname[type].exist?
      manifest_pathname[type].each_line do |line|
        line.chomp!
        checksum, data_path = line.split(/\s+\**/, 2)
        if checksum && data_path
          file_pathname = bag_pathname.join(data_path)
          signature = signatures[file_pathname]
          signature.set_checksum(type, checksum)
        end
      end
    end
  end
  signatures.each { |file_pathname, signature| signature.size = file_pathname.size }
  signatures
end
summary_fields() click to toggle source

@return [Array<String>] The data fields to include in summary reports

# File lib/moab/file_inventory.rb, line 126
def summary_fields
  %w[type digital_object_id version_id inventory_datetime file_count byte_count block_count groups]
end
write_xml_file(parent_dir, type = nil) click to toggle source

@api external @param parent_dir [Pathname,String] The parent directory in which the xml file is to be stored @param type [String] The inventory type, which governs the filename used for serialization @return [void] write the {FileInventory} instance to a file @example {include:file:spec/features/inventory/write_inventory_xml_spec.rb}

# File lib/moab/file_inventory.rb, line 268
def write_xml_file(parent_dir, type = nil)
  type = @type if type.nil?
  self.class.write_xml_file(self, parent_dir, type)
end