class Gaspar::Extractor

Extract data from all pages of PDF

Public Class Methods

new(source, target, pages, options) click to toggle source
# File lib/gaspar.rb, line 69
def initialize(source, target, pages, options)
  @source = source
  @target = target
  @pages = pages
  @options = options
end

Public Instance Methods

content() click to toggle source
# File lib/gaspar.rb, line 89
def content
  open(@target, 'rb').read
end
extract() click to toggle source
# File lib/gaspar.rb, line 76
def extract
  unless command_available?
    io_error 'Can\'t find pdf-table-extract executable in PATH'
  end

  opts = process_options.split(' ')
  args = [extract_command, opts].flatten

  pid = Spoon.spawnp(*args)
  Process.waitpid(pid)
  io_error("Could not parse #{@source}") unless $?.exitstatus.zero?
end

Private Instance Methods

command_available?() click to toggle source
# File lib/gaspar.rb, line 107
def command_available?
  extract_command
end
extract_command() click to toggle source
# File lib/gaspar.rb, line 111
def extract_command
  'pdf-table-extract' if which('pdf-table-extract')
end
io_error(error_message) click to toggle source
# File lib/gaspar.rb, line 126
def io_error(error_message)
  raise IOError, error_message
end
process_options() click to toggle source
# File lib/gaspar.rb, line 95
def process_options
  opts = []
  opts.push("-i #{@source}") if @source
  opts.push("-o #{@target}") if @target
  @pages.times do |p|
    opts.push("-p #{p + 1}")
  end
  opts.push("-t #{@options[:format]}") if @options[:format]

  opts.join(' ')
end
which(cmd) click to toggle source
# File lib/gaspar.rb, line 115
def which(cmd)
  exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
  ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
    exts.each do |ext|
      exe = File.join(path, "#{cmd}#{ext}")
      return exe if File.executable? exe
    end
  end
  nil
end