class Docsplit::CommandLine
A single command-line utility to separate a PDF into all its component parts.
Constants
- BANNER
Public Class Methods
new()
click to toggle source
Creating a CommandLine
runs off of the contents of ARGV.
# File lib/docsplit/command_line.rb, line 33 def initialize parse_options cmd = ARGV.shift @command = cmd && cmd.to_sym run end
Public Instance Methods
run()
click to toggle source
Delegate to the Docsplit
Ruby API to perform all extractions.
# File lib/docsplit/command_line.rb, line 41 def run begin case @command when :images then Docsplit.extract_images(ARGV, @options) when :pages then Docsplit.extract_pages(ARGV, @options) when :text then Docsplit.extract_text(ARGV, @options) when :pdf then Docsplit.extract_pdf(ARGV, @options) else if METADATA_KEYS.include?(@command) value = Docsplit.send("extract_#{@command}", ARGV, @options) puts value unless value.nil? else usage end end rescue ExtractionFailed => e puts e.message.chomp exit(1) end end
usage()
click to toggle source
Print out the usage help message.
# File lib/docsplit/command_line.rb, line 63 def usage puts "\n#{@option_parser}\n" exit end
Private Instance Methods
parse_options()
click to toggle source
Use the OptionParser library to parse out all supported options. Return options formatted for the Ruby API.
# File lib/docsplit/command_line.rb, line 73 def parse_options @options = {:ocr => :default, :clean => true} @option_parser = OptionParser.new do |opts| opts.on('-o', '--output [DIR]', 'set the directory for all output') do |d| @options[:output] = d end opts.on('-p', '--pages [PAGES]', "extract specific pages (eg: 5-10)") do |p| @options[:pages] = p end opts.on('-s', '--size [SIZE]', 'set a fixed size (eg: 50x75)') do |s| @options[:size] = s.split(',') end opts.on('-f', '--format [FORMAT]', 'set image format (pdf, jpg, gif...)') do |t| @options[:format] = t.split(',') end opts.on('-d', '--density [NUM]', 'set image density (DPI) when rasterizing') do |d| @options[:density] = d end opts.on('--[no-]ocr', 'force OCR to be used, or disable OCR') do |o| @options[:ocr] = o end opts.on('--no-clean', 'disable cleaning of OCR\'d text') do |c| @options[:clean] = false end opts.on('-l', '--language [LANGUAGE]', 'set the language (ISO 639-2/T code) for text extraction') do |l| @options[:language] = l end opts.on('--no-orientation-detection', 'turn off automatic orientation detection in tesseract') do |n| @options[:detect_orientation] = false end opts.on('-r', '--rolling', 'generate images from each previous image') do |r| @options[:rolling] = true end opts.on_tail('-v', '--version', 'display docsplit version') do puts "Docsplit version #{Docsplit::VERSION}" exit end opts.on_tail('-h', '--help', 'display this help message') do usage end end @option_parser.banner = BANNER begin @option_parser.parse!(ARGV) rescue OptionParser::InvalidOption => e puts e.message exit(1) end end