class UtteranceParser::Parser

Public Class Methods

new(save_path=nil) click to toggle source
# File lib/utterance_parser/parser.rb, line 6
def initialize(save_path=nil)
  build_save_paths(save_path) if save_path

  if save_path && File.exist?(@classifier_file)
    @classifier = NBayes::Base.from(@classifier_file)
  else
    @classifier = NBayes::Base.new
  end

  if save_path && File.exist?(@labeller_file)
    @labeller = Wapiti::Model.load(@labeller_file)
  else
    @labeller = Wapiti::Model.new pattern: "#{__dir__}/pattern.txt"
  end
end

Public Instance Methods

parse(text) click to toggle source
# File lib/utterance_parser/parser.rb, line 43
def parse(text)
  utterance = Utterance.new(text)
  intent = @classifier.classify(utterance.pos_tokens).max_class
  labeled = @labeller.label([ utterance.pos_tokens.map { |t| t.join(" ") } ]).first
  [intent, extract_entities(labeled)]
end
save(path=nil) click to toggle source
# File lib/utterance_parser/parser.rb, line 50
def save(path=nil)
  build_save_paths path if path

  if !defined?(@classifier_file) || !defined?(@labeller_file)
    raise ArgumentError, "Path to save directory missing"
  end

  @classifier.dump(@classifier_file)
  @labeller.compact
  @labeller.save(@labeller_file)
end
train(examples) click to toggle source
# File lib/utterance_parser/parser.rb, line 22
def train(examples)
  case examples
  when Array
    # All good!
  when Hash
    examples = examples.map { |utterance, intent| Example.new(utterance, intent) }
  else
    raise ArgumentError, "Expected [<Example>, ...] or { utterance => intent, ... }"
  end

  examples.each do |example|
    @classifier.train(example.pos_tokens, example.intent)
  end

  labeled_examples = examples.map do |example|
    example.labeled_tokens.map { |word, tag, entity| [word, tag, entity || "_"].join(" ") }
  end

  @labeller.train labeled_examples
end

Private Instance Methods

build_save_paths(save_path) click to toggle source
# File lib/utterance_parser/parser.rb, line 81
def build_save_paths(save_path)
  raise ArgumentError, "Path to save directory missing" unless File.directory?(save_path)

  @classifier_file = File.join(save_path, "classifier.yml")
  @labeller_file = File.join(save_path, "labeller.mod")
end
extract_entities(tokens) click to toggle source

Extract entities from tokens.

Eg.
[“Play NNP”, “_”], [“some DET”, “_”], [“jazz NN”, “category”

]

Returns:

{ category: "jazz" }
# File lib/utterance_parser/parser.rb, line 68
def extract_entities(tokens)
  # FIXME this will not handle duplicated labels, eg.: category being used twice.
  labeled = tokens.group_by { |tagged_word, label| label }
  labeled.delete("_")
  
  labeled.each_with_object({}) do |(label, words), entities|
    entities[label.to_sym] = words.map do |word, _|
      # Remove the POS tag from. Eg.: `word == 'jazz NN'`
      word[0, word.rindex(" ")]
    end.join(" ")
  end
end