class Gaspar::Extractor
Extract data from all pages of PDF
Public Class Methods
new(source, target, pages, options)
click to toggle source
# File lib/gaspar.rb, line 69 def initialize(source, target, pages, options) @source = source @target = target @pages = pages @options = options end
Public Instance Methods
content()
click to toggle source
# File lib/gaspar.rb, line 89 def content open(@target, 'rb').read end
extract()
click to toggle source
# File lib/gaspar.rb, line 76 def extract unless command_available? io_error 'Can\'t find pdf-table-extract executable in PATH' end opts = process_options.split(' ') args = [extract_command, opts].flatten pid = Spoon.spawnp(*args) Process.waitpid(pid) io_error("Could not parse #{@source}") unless $?.exitstatus.zero? end
Private Instance Methods
command_available?()
click to toggle source
# File lib/gaspar.rb, line 107 def command_available? extract_command end
extract_command()
click to toggle source
# File lib/gaspar.rb, line 111 def extract_command 'pdf-table-extract' if which('pdf-table-extract') end
io_error(error_message)
click to toggle source
# File lib/gaspar.rb, line 126 def io_error(error_message) raise IOError, error_message end
process_options()
click to toggle source
# File lib/gaspar.rb, line 95 def process_options opts = [] opts.push("-i #{@source}") if @source opts.push("-o #{@target}") if @target @pages.times do |p| opts.push("-p #{p + 1}") end opts.push("-t #{@options[:format]}") if @options[:format] opts.join(' ') end
which(cmd)
click to toggle source
# File lib/gaspar.rb, line 115 def which(cmd) exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| exts.each do |ext| exe = File.join(path, "#{cmd}#{ext}") return exe if File.executable? exe end end nil end