class NewspaperWorks::IssuePDFComposer

Adapter class composes a PDF derivative for issue, if it requires one.

Constants

CMD_BASE

Attributes

issue[RW]
page_pdfs[RW]

Public Class Methods

new(issue) click to toggle source

@param issue [NewspaperIssue] adapts issue work object

# File lib/newspaper_works/issue_pdf_composer.rb, line 12
def initialize(issue)
  @issue = issue
  # paths to page PDFs
  @page_pdfs = []
end

Public Instance Methods

compose() click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 18
def compose
  # we will not step on any existing PDF
  return if issue_pdf_exists?
  # we can not compose a multi-page issue PDF if constituent page PDFs
  #   do not exist (yet == not ready, possibly waiting on an async job).
  @page_pdfs = validated_page_pdfs
  # Compose a Ghostscript command to merge all paths in @page_pdfs into
  #   a single output document, execute:
  compose_from_pages
end
compose_from_pages() click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 29
def compose_from_pages
  outfile = File.join(Dir.mktmpdir, output_filename)
  sources = @page_pdfs.join(' ')
  cmd = "#{CMD_BASE} -sOutputFile=#{outfile} #{sources}"
  # rubocop:disable Lint/UnusedBlockArgument
  Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
    unless wait_thr.value.success?
      e = "Ghostscript Error: \n#{stderr.read}"
      raise NewspaperWorks::DataError, e
    end
  end
  # rubocop:enable Lint/UnusedBlockArgument
  # at this point, something should exist and validate at path `outfile`:
  raise NewspaperWorks::DataError, "Generated PDF invalid" unless validate_pdf(outfile)
  # Assign for attachment to issue, commit:
  attach_to_issue(outfile)
end
output_filename() click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 47
def output_filename
  "#{@issue.id}_full-issue.pdf"
end
validate_pdf(path) click to toggle source

Validate PDF with poppler `pdfinfo` command, which will detect

error conditions in cases like truncated PDF, and only in those
error conditions will write to stderr.

@param path [String] path to PDF file @return [Boolean] true or false

# File lib/newspaper_works/issue_pdf_composer.rb, line 56
def validate_pdf(path)
  return false if path.nil? || !File.exist?(path)
  return false if File.size(path).zero?
  result = ''
  cmd = "pdfinfo #{path}"
  # rubocop:disable Lint/UnusedBlockArgument
  Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
    result = stderr.read
  end
  # rubocop:enable Lint/UnusedBlockArgument
  # only zero bytes stderr output from `pdfinfo` considered valid PDF:
  result.size.zero?
end

Private Instance Methods

attach_to_issue(path) click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 101
def attach_to_issue(path)
  ensure_whitelist
  # We rely upon WorkFiles to create fileset, and by consequence of
  #   running primary file attachment through actor stack,
  #   visibility of the FileSet is copied from the work:
  attachment = NewspaperWorks::Data::WorkFiles.of(@issue)
  attachment.assign(path)
  attachment.commit!
end
derivatives_of(work) click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 92
def derivatives_of(work)
  NewspaperWorks::Data::WorkDerivatives.of(work)
end
ensure_whitelist() click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 96
def ensure_whitelist
  whitelist = Hyrax.config.whitelisted_ingest_dirs
  whitelist.push(Dir.tmpdir) unless whitelist.include?(Dir.tmpdir)
end
issue_pdf_exists?() click to toggle source
# File lib/newspaper_works/issue_pdf_composer.rb, line 88
def issue_pdf_exists?
  derivatives_of(@issue).exist?('pdf')
end
validated_page_pdfs() click to toggle source

@return [Array] list of paths to page PDFs, in page order @raises [NewspaperWorks::PagesNotReady] if any page has invalid

or non-ready PDF source.
# File lib/newspaper_works/issue_pdf_composer.rb, line 75
def validated_page_pdfs
  result = []
  # if any page PDF invalid, raise; otherwise append to result:
  issue.pages.to_a.each_with_index do |page, idx|
    e = "Page PDFs not ready for issue "\
      "(Issue id: #{issue.id}, Page index: #{idx})"
    path = derivatives_of(page).path('pdf')
    raise NewspaperWorks::PagesNotReady, e unless validate_pdf(path)
    result.push(path)
  end
  result
end