class Tikarb

Constants

VERSION

Attributes

path[RW]

Public Class Methods

cli(*args) click to toggle source
# File lib/tikarb.rb, line 45
def cli(*args)
  Java.load

  trap_stdout do
    cli = Java::TikaCLI.new
    args.each { |arg| cli.process(arg) }
  end
end
detect(file) click to toggle source
# File lib/tikarb.rb, line 29
def detect(file)
  Java.load

  tika = Java::Tika.new
  tika.detect(file_to_input_stream(file))
end
parse(file) click to toggle source
# File lib/tikarb.rb, line 36
def parse(file)
  Java.load

  tika = Java::Tika.new
  metadata = Java::Metadata.new
  text = tika.parseToString(file_to_input_stream(file), metadata)
  return text, metadata_to_hash(metadata)
end

Private Class Methods

file_to_input_stream(file) click to toggle source
# File lib/tikarb.rb, line 56
def file_to_input_stream(file)
  if file.respond_to?(:read)
    Java::ByteArrayInputStream.new(file.read)
  else
    Java::FileInputStream.new(file)
  end
end
metadata_to_hash(metadata) click to toggle source
# File lib/tikarb.rb, line 71
def metadata_to_hash(metadata)
  metadata.names.each_with_object({}) do |name, hash|
    hash[name] = metadata.get(name)
  end
end
trap_stdout() { || ... } click to toggle source
# File lib/tikarb.rb, line 64
def trap_stdout
  out = Java::ByteArrayOutputStream.new
  Java::System.setOut(Java::PrintStream.new(out))
  yield
  out.toString
end