class Opener::Ners::Base

Base NER class that supports various languages such as Dutch and English.

Constants

MODELS_PATH

The default models directory.

VERSION

Attributes

enable_time[R]

@return [TrueClass|FalseClass]

models[R]

@return [String]

Public Class Methods

new(options = {}) click to toggle source

@param [Hash] options

@option options [TrueClass|FalseClass] :enable_time Whether or not to

enable dynamic timestamps (enabled by default).
# File lib/opener/ners/base.rb, line 30
def initialize(options = {})
  @models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH

  @enable_time = options.fetch(:enable_time, true)
end

Public Instance Methods

language_from_kaf(input) click to toggle source

Returns the language for the given KAF document.

@param [String] input @return [String]

# File lib/opener/ners/base.rb, line 73
def language_from_kaf(input)
  parser   = Oga::XML::PullParser.new(input)
  language = nil

  parser.parse do |node|
    if node.is_a?(Oga::XML::Element) and node.name == 'KAF'
      language = node.get('xml:lang')
      break
    end
  end

  # Make sure nobody can _somehow_ inject a language such as "../../foo".
  unless language =~ /\A[a-zA-Z\-_]+\z/
    raise Core::UnsupportedLanguageError, language
  end

  language
end
new_kaf_document(input) click to toggle source

@param [String] input The input KAF document as a string. @return [Java::ixa.kaflib.KAFDocument]

# File lib/opener/ners/base.rb, line 60
def new_kaf_document(input)
  input_io = StringIO.new(input)
  reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

  Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
end
run(input) click to toggle source

Runs the command and returns the output of STDOUT, STDERR and the process information.

@param [String] input The input to process. @return [Array]

# File lib/opener/ners/base.rb, line 43
def run(input)
  lang  = language_from_kaf(input)
  model = File.join(models, "#{lang}.bin")

  raise(Core::UnsupportedLanguageError, lang) unless File.file?(model)

  kaf        = new_kaf_document(input)
  properties = build_properties(lang, model)
  annotator  = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties)

  annotator.annotate_kaf(enable_time, kaf)
end

Private Instance Methods

build_properties(language, model) click to toggle source

@param [String] language @param [String] model

# File lib/opener/ners/base.rb, line 96
def build_properties(language, model)
  properties = Java::java.util.Properties.new

  properties.set_property('language', language)
  properties.set_property('model', model)
  properties.set_property('ruleBasedOption', 'off')
  properties.set_property('dictTag', 'off')
  properties.set_property('dictPath', 'off')
  properties.set_property('clearFeatures', 'no')

  properties
end