class Splitta::Model

Constants

LABELS

Attributes

feats[R]
lower_words[R]
non_abbrs[R]
prior_probs[R]

Public Class Methods

new() click to toggle source
# File lib/splitta/model.rb, line 17
def initialize
  @feats = model_read(:feats)
  @lower_words = model_read(:lower_words)
  @non_abbrs = model_read(:non_abbrs)

  @prior_probs = LABELS.each_with_object({}) do |label, h|
    h[label] = feats[[label, '<prior>']]**4
  end
end

Public Instance Methods

classify(doc) click to toggle source
# File lib/splitta/model.rb, line 27
def classify(doc)
  doc.frags.each do |frag|
    frag.pred = classify_one(frag)
  end
end
inspect() click to toggle source
# File lib/splitta/model.rb, line 33
def inspect
  "#<Splitta::Model:#{object_id}>"
end

Private Instance Methods

basedir() click to toggle source
# File lib/splitta/model.rb, line 65
def basedir
  File.expand_path('../../data', __dir__)
end
classify_one(frag) click to toggle source
# File lib/splitta/model.rb, line 39
def classify_one(frag)
  probs = prior_probs.dup
  LABELS.each do |label|
    frag.features(self).each do |f|
      key = [label, f.join('_')]
      next unless feats.include?(key)

      probs[label] *= feats[key]
    end
  end
  normalize(probs).fetch(LABELS.last)
end
model_read(name) click to toggle source
# File lib/splitta/model.rb, line 59
def model_read(name)
  Zlib::GzipReader.open(File.join(basedir, name.to_s)) do |gz|
    Marshal.load(gz)
  end
end
normalize(probs) click to toggle source
# File lib/splitta/model.rb, line 52
def normalize(probs)
  total = probs.values.reduce(:+).to_f
  probs.transform_values do |value|
    value / total
  end
end