class UncleKryon::Trainer

Attributes

max_tag_id_length[RW]
max_tag_length[RW]
tags[RW]
trainer[RW]

Public Class Methods

new(tags={}) click to toggle source
# File lib/unclekryon/trainer.rb, line 36
def initialize(tags={})
  @max_tag_id_length = 0
  @max_tag_length = 0
  @tags = tags
  @trainer = NBayes::Base.new

  init_lengths
end
to_tokens(text) click to toggle source
# File lib/unclekryon/trainer.rb, line 25
def self.to_tokens(text)
  tokens = []

  text.split(/[[:space:]]+/).each do |t|
    t.gsub!(/[[:punct:][:cntrl:]]+/,'')
    tokens.push(t) if !t.empty?
  end

  return tokens
end

Public Instance Methods

init_lengths() click to toggle source
# File lib/unclekryon/trainer.rb, line 45
def init_lengths
  @max_tag_id_length = 0
  @max_tag_length = 0

  @tags.each do |id,tag|
    @max_tag_id_length = id.length if id.length > @max_tag_id_length
    @max_tag_length = tag.length if tag.length > @max_tag_length
  end

  @max_tag_id_length += 2 # Indention
  @max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
  @max_tag_length = -@max_tag_length # Left justify
end
tag(text) click to toggle source
# File lib/unclekryon/trainer.rb, line 99
def tag(text)
  return @trainer.classify(self.class.to_tokens(text)).max_class
end
to_s() click to toggle source
# File lib/unclekryon/trainer.rb, line 103
def to_s
  s = ''
  s << @trainer.to_yaml
  s << "\n"
  s << @trainer.data.category_stats

  return s
end
train(text) click to toggle source
# File lib/unclekryon/trainer.rb, line 59
def train(text)
  guess_tag = tag(text) # Try and guess
  tokens = self.class.to_tokens(text)

  puts '#################'
  puts '# Training Tags #'
  puts '#################'

  tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
  @tags.each do |id,tag|
    puts tf % [id,tag]
  end
  puts "<Enter> = Guess: #{guess_tag}"

  puts '-----------------'
  puts text
  puts '-----------------'
  print 'What is it? '

  # Use -t/--test option
  if DevOpts.instance.test?
    puts(tag_id = @tags.keys.sample) # For testing purposes
  else
    tag_id = $stdin.gets.chomp.strip # $stdin because app accepts args
  end
  puts

  if tag_id.empty?
    raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
    tag = guess_tag
  else
    raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
    tag = @tags[tag_id]
  end

  @trainer.train(tokens,tag)

  return tag
end