class Opener::Ners::Base
Base
NER class that supports various languages such as Dutch and English.
Constants
- MODELS_PATH
The default models directory.
- VERSION
Attributes
enable_time[R]
@return [TrueClass|FalseClass]
models[R]
@return [String]
Public Class Methods
new(options = {})
click to toggle source
@param [Hash] options
@option options [TrueClass|FalseClass] :enable_time Whether or not to
enable dynamic timestamps (enabled by default).
# File lib/opener/ners/base.rb, line 30 def initialize(options = {}) @models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH @enable_time = options.fetch(:enable_time, true) end
Public Instance Methods
language_from_kaf(input)
click to toggle source
Returns the language for the given KAF document.
@param [String] input @return [String]
# File lib/opener/ners/base.rb, line 73 def language_from_kaf(input) parser = Oga::XML::PullParser.new(input) language = nil parser.parse do |node| if node.is_a?(Oga::XML::Element) and node.name == 'KAF' language = node.get('xml:lang') break end end # Make sure nobody can _somehow_ inject a language such as "../../foo". unless language =~ /\A[a-zA-Z\-_]+\z/ raise Core::UnsupportedLanguageError, language end language end
new_kaf_document(input)
click to toggle source
@param [String] input The input KAF document as a string. @return [Java::ixa.kaflib.KAFDocument]
# File lib/opener/ners/base.rb, line 60 def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end
run(input)
click to toggle source
Runs the command and returns the output of STDOUT, STDERR and the process information.
@param [String] input The input to process. @return [Array]
# File lib/opener/ners/base.rb, line 43 def run(input) lang = language_from_kaf(input) model = File.join(models, "#{lang}.bin") raise(Core::UnsupportedLanguageError, lang) unless File.file?(model) kaf = new_kaf_document(input) properties = build_properties(lang, model) annotator = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties) annotator.annotate_kaf(enable_time, kaf) end
Private Instance Methods
build_properties(language, model)
click to toggle source
@param [String] language @param [String] model
# File lib/opener/ners/base.rb, line 96 def build_properties(language, model) properties = Java::java.util.Properties.new properties.set_property('language', language) properties.set_property('model', model) properties.set_property('ruleBasedOption', 'off') properties.set_property('dictTag', 'off') properties.set_property('dictPath', 'off') properties.set_property('clearFeatures', 'no') properties end