class Docsplit::PageExtractor

Delegates to pdftk in order to create bursted single pages from a PDF document.

Public Instance Methods

extract(pdfs, opts) click to toggle source

Burst a list of pdfs into single pages, as ‘pdfname_pagenumber.pdf`.

# File lib/docsplit/page_extractor.rb, line 8
def extract(pdfs, opts)
  extract_options opts
  [pdfs].flatten.each do |pdf|
    pdf_name = File.basename(pdf, File.extname(pdf))
    page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
    FileUtils.mkdir_p @output unless File.exists?(@output)
    
    cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
      "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
    else
      "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
    end
    result = `#{cmd}`.chomp
    FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
    raise ExtractionFailed, result if $? != 0
    result
  end
end

Private Instance Methods

extract_options(options) click to toggle source
# File lib/docsplit/page_extractor.rb, line 30
def extract_options(options)
  @output = options[:output] || '.'
end