class Moab::FileGroup

A container for a standard subset of a digital objects {FileManifestation} objects Used to segregate depositor content from repository metadata files This is a child element of {FileInventory}, which contains a full example

Data Model

@note Copyright © 2012 by The Board of Trustees of the Leland Stanford Junior University.

All rights reserved.  See {file:LICENSE.rdoc} for details.

Attributes

base_directory[R]
signature_hash[RW]

@return [Hash<FileSignature, FileManifestation>] The actual in-memory store for the collection

of {FileManifestation} objects that are contained in this file group.

Public Class Methods

new(opts = {}) click to toggle source

(see Serializable#initialize)

Calls superclass method Serializer::Serializable::new
# File lib/moab/file_group.rb, line 24
def initialize(opts = {})
  @signature_hash = {}
  @data_source = ''
  @signatures_from_bag = nil # prevents later warning: instance variable @signatures_from_bag not initialized
  super(opts)
end

Public Instance Methods

add_file(manifestation) click to toggle source

@api internal @param manifestation [FileManifestation] The file manifestation to be added @return [void] Add a single {FileManifestation} object to this group

# File lib/moab/file_group.rb, line 131
def add_file(manifestation)
  manifestation.instances.each do |instance|
    add_file_instance(manifestation.signature, instance)
  end
end
add_file_instance(signature, instance) click to toggle source

@api internal @param signature [FileSignature] The signature of the file instance to be added @param instance [FileInstance] The pathname and datetime of the file instance to be added @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.

Data is actually stored in the {#signature_hash}
# File lib/moab/file_group.rb, line 142
def add_file_instance(signature, instance)
  manifestation = signature_hash[signature] || begin
    FileManifestation.new.tap do |file_manifestation|
      file_manifestation.signature = signature
      signature_hash[signature] = file_manifestation
    end
  end
  manifestation.instances << instance
end
add_physical_file(pathname, _validated = nil) click to toggle source

@api internal @param pathname [Pathname, String] The location of the file to be added @param _validated (unused; kept here for backwards compatibility) @return [void] Add a single physical file’s data to the array of files in this group.

If fixity data was supplied in bag manifests, then utilize that data.
# File lib/moab/file_group.rb, line 233
def add_physical_file(pathname, _validated = nil)
  pathname = Pathname.new(pathname).expand_path
  instance = FileInstance.new.instance_from_file(pathname, @base_directory)
  if @signatures_from_bag && @signatures_from_bag[pathname]
    signature = @signatures_from_bag[pathname]
    signature = signature.normalized_signature(pathname) unless signature.complete?
  else
    signature = FileSignature.new.signature_from_file(pathname)
  end
  add_file_instance(signature, instance)
end
base_directory=(basepath) click to toggle source

@return [Pathname] The full path used as the basis of the relative paths reported

in {FileInstance} objects that are children of the {FileManifestation} objects contained in this file group
# File lib/moab/file_group.rb, line 162
def base_directory=(basepath)
  @base_directory = Pathname.new(basepath).expand_path
end
block_count() click to toggle source
# File lib/moab/file_group.rb, line 59
def block_count
  files.inject(0) { |sum, manifestation| sum + manifestation.block_count }
end
byte_count() click to toggle source
# File lib/moab/file_group.rb, line 51
def byte_count
  files.inject(0) { |sum, manifestation| sum + manifestation.byte_count }
end
file_count() click to toggle source
# File lib/moab/file_group.rb, line 43
def file_count
  files.inject(0) { |sum, manifestation| sum + manifestation.file_count }
end
files() click to toggle source
# File lib/moab/file_group.rb, line 72
def files
  signature_hash.values
end
files=(manifestiation_array) click to toggle source

@param manifestiation_array [Array<FileManifestation>] The collection of {FileManifestation} objects

that are to be added to this file group.  Used by HappyMapper when deserializing a {FileInventory} file

Add the array of {FileManifestation} objects to this file group.

# File lib/moab/file_group.rb, line 122
def files=(manifestiation_array)
  manifestiation_array.each do |manifestiation|
    add_file(manifestiation)
  end
end
group_from_bagit_subdir(directory, signatures_from_bag, recursive = true) click to toggle source

@param directory [Pathame,String] The directory whose children are to be added to the file group @param signatures_from_bag [Hash<Pathname,Signature>] The fixity data already calculated for the files @param recursive [Boolean] if true, descend into child directories @return [FileGroup] Harvest a directory (using digest hash for fixity data) and add all files to the file group

# File lib/moab/file_group.rb, line 186
def group_from_bagit_subdir(directory, signatures_from_bag, recursive = true)
  @signatures_from_bag = signatures_from_bag
  group_from_directory(directory, recursive)
end
group_from_directory(directory, recursive = true) click to toggle source

@api internal @param directory [Pathname,String] The location of the files to harvest @param recursive [Boolean] if true, descend into child directories @return [FileGroup] Harvest a directory and add all files to the file group

# File lib/moab/file_group.rb, line 195
def group_from_directory(directory, recursive = true)
  self.base_directory = directory
  @data_source = @base_directory.to_s
  harvest_directory(directory, recursive)
  self
rescue Exception # Errno::ENOENT
  @data_source = directory.to_s
  self
end
harvest_directory(path, recursive, validated = nil) click to toggle source

@api internal @param path [Pathname,String] pathname of the directory to be harvested @param recursive [Boolean] if true, also harvest subdirectories @param validated [Boolean] if true, path is verified to be descendant of (base_directory) @return [void] Traverse a directory tree and add all files to the file group

Note that unlike Find.find and Dir.glob, Pathname passes through symbolic links

@see stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks @see stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob

# File lib/moab/file_group.rb, line 213
def harvest_directory(path, recursive, validated = nil)
  pathname = Pathname.new(path).expand_path
  validated ||= is_descendent_of_base?(pathname)
  pathname.children.sort.each do |child|
    next if child.basename.to_s == '.DS_Store'

    if child.directory?
      harvest_directory(child, recursive, validated) if recursive
    else
      add_physical_file(child, validated)
    end
  end
  nil
end
is_descendent_of_base?(pathname) click to toggle source

FIXME: shouldn’t this method be named descendent_of_base? @api internal @param pathname [Pathname] The file path to be tested @return [Boolean] Test whether the given path is contained within the {#base_directory}

# File lib/moab/file_group.rb, line 171
def is_descendent_of_base?(pathname)
  raise(MoabRuntimeError, 'base_directory has not been set') if @base_directory.nil?

  is_descendent = false
  pathname.expand_path.ascend { |ancestor| is_descendent ||= (ancestor == @base_directory) }
  # FIXME:  shouldn't it simply return false?
  raise(MoabRuntimeError, "#{pathname} is not a descendent of #{@base_directory}") unless is_descendent

  is_descendent
end
path_hash() click to toggle source

@api internal @return [Hash<String,FileSignature>] An index of file paths,

used to test for existence of a filename in this file group
# File lib/moab/file_group.rb, line 83
def path_hash
  path_hash = {}
  signature_hash.each do |signature, manifestation|
    manifestation.instances.each do |instance|
      path_hash[instance.path] = signature
    end
  end
  path_hash
end
path_hash_subset(signature_subset) click to toggle source

@api internal @param signature_subset [Array<FileSignature>] The signatures used to select the entries to return @return [Hash<String,FileSignature>] A pathname,signature hash containing a subset of the filenames in this file

group, e.g., {"intro-1.jpg"=>#<Moab::FileSignature>, ...}
# File lib/moab/file_group.rb, line 102
def path_hash_subset(signature_subset)
  # the structure of the `signature_hash` attr is documented above
  signature_hash
    .filter_map do |signature, manifestation|
    # filters out signatures not in the provided subset
    next unless signature_subset.include?(signature)

    # for each instance in the manifestation, return an array of its path and the signature from the above block
    manifestation.instances.map { |instance| [instance.path, signature] }
  end
    # the nested map operations above return e.g.: [[["intro-1.jpg",
    # #<Moab::FileSignature>],...]] which needs to be flattened one time to
    # convert back into a hash
    .flatten(1)
    .to_h
end
path_list() click to toggle source

@return [Array<String>] The list of file paths in this group

# File lib/moab/file_group.rb, line 94
def path_list
  files.collect { |file| file.instances.collect(&:path) }.flatten
end
remove_file_having_path(path) click to toggle source

@param path [String] The path of the file to be removed @return [void] Remove a file from the inventory for example, the manifest inventory does not contain a file entry for itself

# File lib/moab/file_group.rb, line 155
def remove_file_having_path(path)
  signature = path_hash[path]
  signature_hash.delete(signature)
end
summary_fields() click to toggle source

@return [Array<String>] The data fields to include in summary reports

# File lib/moab/file_group.rb, line 64
def summary_fields
  %w[group_id file_count byte_count block_count]
end