class Splitta::Model
Constants
- LABELS
Attributes
feats[R]
lower_words[R]
non_abbrs[R]
prior_probs[R]
Public Class Methods
new()
click to toggle source
# File lib/splitta/model.rb, line 17 def initialize @feats = model_read(:feats) @lower_words = model_read(:lower_words) @non_abbrs = model_read(:non_abbrs) @prior_probs = LABELS.each_with_object({}) do |label, h| h[label] = feats[[label, '<prior>']]**4 end end
Public Instance Methods
classify(doc)
click to toggle source
# File lib/splitta/model.rb, line 27 def classify(doc) doc.frags.each do |frag| frag.pred = classify_one(frag) end end
inspect()
click to toggle source
# File lib/splitta/model.rb, line 33 def inspect "#<Splitta::Model:#{object_id}>" end
Private Instance Methods
basedir()
click to toggle source
# File lib/splitta/model.rb, line 65 def basedir File.expand_path('../../data', __dir__) end
classify_one(frag)
click to toggle source
# File lib/splitta/model.rb, line 39 def classify_one(frag) probs = prior_probs.dup LABELS.each do |label| frag.features(self).each do |f| key = [label, f.join('_')] next unless feats.include?(key) probs[label] *= feats[key] end end normalize(probs).fetch(LABELS.last) end
model_read(name)
click to toggle source
# File lib/splitta/model.rb, line 59 def model_read(name) Zlib::GzipReader.open(File.join(basedir, name.to_s)) do |gz| Marshal.load(gz) end end
normalize(probs)
click to toggle source
# File lib/splitta/model.rb, line 52 def normalize(probs) total = probs.values.reduce(:+).to_f probs.transform_values do |value| value / total end end