class Moab::Bagger

A class used to create a BagIt package from a version inventory and a set of source files. The {#fill_bag} method is called with a package_mode parameter that specifies whether the bag is being created for deposit into the repository or is to contain the output of a version reconstruction.

new files are included
set of files for the version.

Data Model

@note Copyright © 2012 by The Board of Trustees of the Leland Stanford Junior University.

All rights reserved.  See {file:LICENSE.rdoc} for details.

Attributes

bag_inventory[RW]

@return [FileInventory] The actual inventory of the files to be packaged (derived from @version_inventory in {#fill_bag})

bag_pathname[RW]

@return [Pathname] The location of the Bagit bag to be created

package_mode[RW]

@return [Symbol] The operational mode controlling what gets bagged {#fill_bag}

and the full path of source files {#fill_payload}
signature_catalog[RW]

@return [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),

or to filter the version inventory (in :depositor mode)
version_inventory[RW]

@return [FileInventory] The complete inventory of the files comprising a digital object version

Public Class Methods

new(version_inventory, signature_catalog, bag_pathname) click to toggle source

@param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version @param signature_catalog [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),

or to filter the version inventory (in :depositor mode)

@param bag_pathname [Pathname,String] The location of the Bagit bag to be created

# File lib/moab/bagger.rb, line 26
def initialize(version_inventory, signature_catalog, bag_pathname)
  @version_inventory = version_inventory
  @signature_catalog = signature_catalog
  @bag_pathname = Pathname.new(bag_pathname)
  create_bagit_txt
end

Public Instance Methods

create_bag_info_txt() click to toggle source

@api internal @return [void] Generate the bag-info.txt tag file

# File lib/moab/bagger.rb, line 219
def create_bag_info_txt
  bag_pathname.join('bag-info.txt').open('w') do |f|
    f.puts "External-Identifier: #{bag_inventory.package_id}"
    f.puts "Payload-Oxum: #{bag_inventory.byte_count}.#{bag_inventory.file_count}"
    f.puts "Bag-Size: #{bag_inventory.human_size}"
  end
end
create_bag_inventory(package_mode) click to toggle source

@api external @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of

source files (Bagger#fill_payload)

@return [FileInventory] Create, write, and return the inventory of the files that will become the payload

# File lib/moab/bagger.rb, line 107
def create_bag_inventory(package_mode)
  @package_mode = package_mode
  bag_pathname.mkpath
  case package_mode
  when :depositor
    version_inventory.write_xml_file(bag_pathname, 'version')
    @bag_inventory = signature_catalog.version_additions(version_inventory)
    bag_inventory.write_xml_file(bag_pathname, 'additions')
  when :reconstructor
    @bag_inventory = version_inventory
    bag_inventory.write_xml_file(bag_pathname, 'version')
  end
  bag_inventory
end
create_bagit_txt() click to toggle source

@api internal @return [void] Generate the bagit.txt tag file

# File lib/moab/bagger.rb, line 59
def create_bagit_txt
  bag_pathname.mkpath
  bag_pathname.join('bagit.txt').open('w') do |f|
    f.puts 'Tag-File-Character-Encoding: UTF-8'
    f.puts 'BagIt-Version: 0.97'
  end
end
create_payload_manifests() click to toggle source

@api internal @return [void] Using the checksum information from the inventory, create BagIt manifest files for the payload

# File lib/moab/bagger.rb, line 189
def create_payload_manifests
  manifest_pathname = {}
  manifest_file = {}
  DEFAULT_CHECKSUM_TYPES.each do |type|
    manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
    manifest_file[type] = manifest_pathname[type].open('w')
  end
  bag_inventory.groups.each do |group|
    group.files.each do |file|
      fixity = file.signature.fixity
      file.instances.each do |instance|
        data_path = File.join('data', group.group_id, instance.path)
        DEFAULT_CHECKSUM_TYPES.each do |type|
          manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
        end
      end
    end
  end
ensure
  DEFAULT_CHECKSUM_TYPES.each do |type|
    if manifest_file[type]
      manifest_file[type].close
      manifest_pathname[type].delete if
          manifest_pathname[type].exist? && manifest_pathname[type].empty?
    end
  end
end
create_tagfile_manifests() click to toggle source

@api internal @return [void] create BagIt tag manifest files containing checksums for all files in the bag’s root directory

# File lib/moab/bagger.rb, line 229
def create_tagfile_manifests
  manifest_pathname = {}
  manifest_file = {}
  DEFAULT_CHECKSUM_TYPES.each do |type|
    manifest_pathname[type] = bag_pathname.join("tagmanifest-#{type}.txt")
    manifest_file[type] = manifest_pathname[type].open('w')
  end
  bag_pathname.children.each do |file|
    unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
      signature = FileSignature.new.signature_from_file(file)
      fixity = signature.fixity
      DEFAULT_CHECKSUM_TYPES.each do |type|
        manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
      end
    end
  end
ensure
  DEFAULT_CHECKSUM_TYPES.each do |type|
    if manifest_file[type]
      manifest_file[type].close
      manifest_pathname[type].delete if
          manifest_pathname[type].exist? && manifest_pathname[type].empty?
    end
  end
end
create_tagfiles() click to toggle source

@return [Boolean] create BagIt manifests and tag files. Return true if successful

# File lib/moab/bagger.rb, line 179
def create_tagfiles
  create_payload_manifests
  create_bag_info_txt
  create_bagit_txt
  create_tagfile_manifests
  true
end
create_tarfile(tar_pathname = nil) click to toggle source

@return [Boolean] Create a tar file containing the bag

# File lib/moab/bagger.rb, line 256
def create_tarfile(tar_pathname = nil)
  bag_name = bag_pathname.basename
  bag_parent = bag_pathname.parent
  tar_pathname ||= bag_parent.join("#{bag_name}.tar")
  tar_cmd = "cd '#{bag_parent}'; tar --dereference --force-local -cf  '#{tar_pathname}' '#{bag_name}'"
  begin
    shell_execute(tar_cmd)
  rescue
    shell_execute(tar_cmd.sub('--force-local', ''))
  end
  raise(MoabRuntimeError, "Unable to create tarfile #{tar_pathname}") unless tar_pathname.exist?

  true
end
delete_bag() click to toggle source

@return [NilClass] Delete the bagit files

# File lib/moab/bagger.rb, line 68
def delete_bag
  # make sure this looks like a bag before deleting
  if bag_pathname.join('bagit.txt').exist?
    if bag_pathname.join('data').exist?
      bag_pathname.rmtree
    else
      bag_pathname.children.each(&:delete)
      bag_pathname.rmdir
    end
  end
  nil
end
delete_tarfile() click to toggle source

@param tar_pathname [Pathname] The location of the tar file (default is based on bag location)

# File lib/moab/bagger.rb, line 82
def delete_tarfile
  bag_name = bag_pathname.basename
  bag_parent = bag_pathname.parent
  tar_pathname = bag_parent.join("#{bag_name}.tar")
  tar_pathname.delete if tar_pathname.exist?
end
deposit_group(group_id, source_dir) click to toggle source

@param group_id [String] The name of the data group being copied to the bag @param source_dir [Pathname] The location from which files should be copied @return [Boolean] Copy all the files listed in the group inventory to the bag.

Return true if successful or nil if the group was not found in the inventory
# File lib/moab/bagger.rb, line 143
def deposit_group(group_id, source_dir)
  group = bag_inventory.group(group_id)
  return nil? if group.nil? || group.files.empty?

  target_dir = bag_pathname.join('data', group_id)
  group.path_list.each do |relative_path|
    source = source_dir.join(relative_path)
    target = target_dir.join(relative_path)
    target.parent.mkpath
    FileUtils.symlink source, target
  end
  true
end
fill_bag(package_mode, source_base_pathname) click to toggle source

@api external @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of

source files (Bagger#fill_payload)

@param source_base_pathname [Pathname] The home location of the source files @return [Bagger] Perform all the operations required to fill the bag payload, write the manifests and

tagfiles, and checksum the tagfiles

@example {include:file:spec/features/storage/deposit_spec.rb}

# File lib/moab/bagger.rb, line 96
def fill_bag(package_mode, source_base_pathname)
  create_bag_inventory(package_mode)
  fill_payload(source_base_pathname)
  create_tagfiles
  self
end
fill_payload(source_base_pathname) click to toggle source

@api internal @param source_base_pathname [Pathname] The home location of the source files @return [void] Fill in the bag’s data folder with copies of all files to be packaged for delivery. This method uses Unix hard links in order to greatly speed up the process. Hard links, however, require that the target bag must be created within the same filesystem as the source files

# File lib/moab/bagger.rb, line 127
def fill_payload(source_base_pathname)
  bag_inventory.groups.each do |group|
    group_id = group.group_id
    case package_mode
    when :depositor
      deposit_group(group_id, source_base_pathname.join(group_id))
    when :reconstructor
      reconstuct_group(group_id, source_base_pathname)
    end
  end
end
reconstuct_group(group_id, storage_object_dir) click to toggle source

@param group_id [String] The name of the data group being copied to the bag @param storage_object_dir [Pathname] The home location of the object store from which files should be copied @return [Boolean] Copy all the files listed in the group inventory to the bag.

Return true if successful or nil if the group was not found in the inventory
# File lib/moab/bagger.rb, line 161
def reconstuct_group(group_id, storage_object_dir)
  group = bag_inventory.group(group_id)
  return nil? if group.nil? || group.files.empty?

  target_dir = bag_pathname.join('data', group_id)
  group.files.each do |file|
    catalog_entry = signature_catalog.signature_hash[file.signature]
    source = storage_object_dir.join(catalog_entry.storage_path)
    file.instances.each do |instance|
      target = target_dir.join(instance.path)
      target.parent.mkpath
      FileUtils.symlink source, target unless target.exist?
    end
  end
  true
end
reset_bag() click to toggle source

@return [void] Delete any existing bag data and re-initialize the bag directory

# File lib/moab/bagger.rb, line 51
def reset_bag
  delete_bag
  delete_tarfile
  create_bagit_txt
end
shell_execute(command) click to toggle source

Executes a system command in a subprocess if command isn’t successful, grabs stdout and stderr and puts them in ruby exception message @return stdout if execution was successful

# File lib/moab/bagger.rb, line 274
def shell_execute(command)
  require 'open3'
  stdout, stderr, status = Open3.capture3(command.chomp)
  if status.success? && status.exitstatus.zero?
    stdout
  else
    msg = "Shell command failed: [#{command}] caused by <STDERR = #{stderr}>"
    msg << " STDOUT = #{stdout}" if stdout&.length&.positive?
    raise(MoabStandardError, msg)
  end
rescue SystemCallError => e
  msg = "Shell command failed: [#{command}] caused by #{e.inspect}"
  raise(MoabStandardError, msg)
end