class NewspaperWorks::Ingest::NDNP::PageIngester

Constants

COPY_FIELDS
COPY_FIELDS_PLURALIZE

Attributes

issue[RW]
opts[RW]
page[RW]
target[RW]

Public Class Methods

new(page, issue, opts = {}) click to toggle source

@param page [NewspaperWorks::Ingest::NDNP::PageIngest]

source page data

@param issue [NewspaperIssue]

source issue data

@param opts [Hash]

ingest options, e.g. administrative metadata
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 31
def initialize(page, issue, opts = {})
  @page = page
  @issue = issue
  @opts = opts
  # target is to-be-created NewspaperPage:
  @target = nil
  @work_files = nil
  configure_logger('ingest')
end

Public Instance Methods

construct_page() click to toggle source
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 47
def construct_page
  @target = NewspaperPage.create!(title: page_title)
  write_log(
    "Created NewspaperPage work #{@target.id} "\
    "with title '#{@target.title[0]}'"
  )
  copy_page_metadata
  assign_administrative_metadata
  link_issue
  @target.save!
  write_log("Saved metadata to NewspaperPage work #{@target.id}")
end
ingest() click to toggle source
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 41
def ingest
  construct_page
  ingest_page_files
  link_reel
end
ingest_page_files() click to toggle source

Ingest primary, derivative files; other derivatives including

thumbnail, plain-text, json will be made by NewspaperWorks
derivative service components as a consequence of commiting
files assigned (via actor stack, via WorkFiles).
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 64
def ingest_page_files
  @work_files = NewspaperWorks::Data::WorkFiles.new(@target)
  page.files.each do |path|
    ext = path.downcase.split('.')[-1]
    if ['tif', 'tiff'].include?(ext)
      ingest_primary_file(path)
    else
      ingest_derivative_file(path)
    end
  end
  write_log("Beginning file attachment process (WorkFiles.commit!) "\
    "for work #{@target.id}")
  @work_files.commit!
end

Private Instance Methods

copy_page_metadata() click to toggle source
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 144
def copy_page_metadata
  metadata = page.metadata
  # copy all fields with singular (non-repeatable) values on both
  #   target NewspaperIssue object, and metadata source:
  COPY_FIELDS.each do |fieldname|
    value = metadata.send(fieldname.to_s)
    pluralize = COPY_FIELDS_PLURALIZE.include?(fieldname)
    @target.send("#{fieldname}=", pluralize ? [value] : value)
  end
end
ingest_derivative_file(path) click to toggle source
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 105
def ingest_derivative_file(path)
  write_log("Assigned derivative file to work #{@target.id}, #{path}")
  @work_files.derivatives.assign(path)
end
ingest_primary_file(path) click to toggle source
# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 95
def ingest_primary_file(path)
  unless File.exist?(path)
    pdf_path = page.files.select { |p| p.end_with?('pdf') }[0]
    # make and get TIFF path (to generated tmp file):
    path = make_tiff(pdf_path)
  end
  write_log("Assigned primary file to work #{@target.id}, #{path}")
  @work_files.assign(path)
end
make_tiff(pdf_path) click to toggle source

Generate TIFF in temporary file, return its path, given path to PDF @param pdf_path [String] path to single-page PDF @return [String] path to generated TIFF

# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 127
def make_tiff(pdf_path)
  write_log(
    "Creating TIFF from PDF in lieu of missing for work "\
    " (#{@target.id})",
    Logger::WARN
  )
  whitelist.push(Dir.tmpdir) unless whitelist.include?(Dir.tmpdir)
  NewspaperWorks::Ingest::PdfPages.new(pdf_path).to_a[0]
end
page_title() click to toggle source

Page title as issue title plus page title

e.g. "ACME Tribune (1910-01-02): Page 2"

@return [String] composed page title

# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 140
def page_title
  ["#{issue.title.first}: Page #{@page.metadata.page_number}"]
end
whitelist() click to toggle source

dir whitelist

# File lib/newspaper_works/ingest/ndnp/page_ingester.rb, line 120
def whitelist
  Hyrax.config.whitelisted_ingest_dirs
end