class TikaMasala::Parser

Attributes

jar_path[R]

Public Class Methods

new(jar_path = nil) click to toggle source
# File lib/tika-masala/parser.rb, line 10
def initialize(jar_path = nil)
  if jar_path.nil?
    jar_path = File.expand_path(File.join('..', '..', '..', 'dependencies', 'tika-app-1.5.jar'), __FILE__)
  end

  raise FileNotFound, "Jar #{jar_path} does not exist" unless File.exists?(jar_path)

  @jar_path = jar_path
end

Public Instance Methods

detect_type(file) click to toggle source
# File lib/tika-masala/parser.rb, line 28
def detect_type(file)
  java_exec('--detect', file)
end
java_exec(*args) click to toggle source
# File lib/tika-masala/parser.rb, line 32
def java_exec(*args)
  stdin, stdout, stderr, wait_thr = Open3.popen3("java -jar #{@jar_path.shellescape} #{args.shelljoin}")

  exitstatus = wait_thr.value

  if exitstatus != 0
    expection = TikaError.new(
      stdout: stdout.read,
      stderr: stderr.read,
      exitstatus: exitstatus
    )
    raise expection
  else
    stdout.read
  end
ensure
  stdin.close
  stdout.close
  stderr.close
end
metadata(file) click to toggle source
# File lib/tika-masala/parser.rb, line 24
def metadata(file)
  java_exec('--metadata', file)
end
parse(file) click to toggle source
# File lib/tika-masala/parser.rb, line 20
def parse(file)
  java_exec('--text', file)
end