class OmniCat::Classifiers::Strategy

Author

Mustafa Turan (mustafaturan.net@gmail.com)

Copyright

Copyright © 2013 Mustafa Turan

License

MIT

The class supplies abstract methods for possible text classifiers

Attributes

categories[RW]
category_count[RW]
category_size_limit[RW]
doc_count[RW]
token_count[RW]
unique_token_count[RW]

Public Class Methods

new(strategy_hash = {}) click to toggle source
# File lib/omnicat/classifiers/strategy.rb, line 19
def initialize(strategy_hash = {})
  @categories = {}
  @category_count = strategy_hash[:category_count].to_i
  @category_size_limit = strategy_hash[:category_size_limit].to_i
  @doc_count = strategy_hash[:doc_count].to_i
  @token_count = strategy_hash[:token_count].to_i
  @unique_token_count = strategy_hash[:unique_token_count].to_i
end

Public Instance Methods

add_categories(category_names) click to toggle source

Allows adding multiple classification categories

Parameters

  • category_names - Array of categories

# File lib/omnicat/classifiers/strategy.rb, line 44
def add_categories(category_names)
  category_names.each { |category_name| add_category(category_name) }
end
add_category(category_name) click to toggle source

Abstract method for adding new classification category

Parameters

  • category_name - Name for category

# File lib/omnicat/classifiers/strategy.rb, line 34
def add_category(category_name)
  not_implemented_error(__callee__)
end
classify(doc_content) click to toggle source

Abstract method for classifying the given document

Parameters

  • doc_content - The document for classification

Returns

# File lib/omnicat/classifiers/strategy.rb, line 102
def classify(doc_content)
  not_implemented_error(__callee__)
end
classify_batch(doc_contents) click to toggle source

Classify the multiple documents at a time

Parameters

  • doc_contents - Array of documents

Returns

# File lib/omnicat/classifiers/strategy.rb, line 116
def classify_batch(doc_contents)
  doc_contents.collect { |doc_content| classify(doc_content) }
end
train(category_name, doc_content) click to toggle source

Abstract method for training the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

# File lib/omnicat/classifiers/strategy.rb, line 55
def train(category_name, doc_content)
  not_implemented_error(__callee__)
end
train_batch(category_name, doc_contents) click to toggle source

Train the desired category with multiple documents

Parameters

  • category_name - Name of the category from added categories list

  • doc_contents - Array of documents

# File lib/omnicat/classifiers/strategy.rb, line 66
def train_batch(category_name, doc_contents)
  doc_contents.each { |doc_content| train(category_name, doc_content) }
end
untrain(category_name, doc_content) click to toggle source

Abstract method for untraining the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

# File lib/omnicat/classifiers/strategy.rb, line 77
def untrain(category_name, doc_content)
  not_implemented_error(__callee__)
end
untrain_batch(category_name, doc_contents) click to toggle source

Untrain the desired category with multiple documents

Parameters

  • category_name - Name of the category from added categories list

  • doc_contents - Array of documents

# File lib/omnicat/classifiers/strategy.rb, line 88
def untrain_batch(category_name, doc_contents)
  doc_contents.each { |doc_content| untrain(category_name, doc_content) }
end

Protected Instance Methods

category_exists?(category_name) click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 128
def category_exists?(category_name)
  categories.has_key?(category_name)
end
classifiable?() click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 155
def classifiable?
  if category_count < 2
    raise StandardError,
          'At least 2 categories needed for classification process!'
    false
  elsif doc_availability? == false
    raise StandardError,
          'Each category must trained with at least one document!'
    false
  else
    true
  end
end
decrement_category_count() click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 138
def decrement_category_count
  @category_count -= 1
end
decrement_doc_counts(category_name) click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 149
def decrement_doc_counts(category_name)
  @doc_count -= 1
  @categories[category_name].doc_count -= 1
end
doc_availability?() click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 170
def doc_availability?
  @categories.each do |_, category|
    return false if category.doc_count == 0
  end
  true
end
increment_category_count() click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 133
def increment_category_count
  @category_count += 1
end
increment_doc_counts(category_name) click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 143
def increment_doc_counts(category_name)
  @doc_count += 1
  @categories[category_name].doc_count += 1
end

Private Instance Methods

not_implemented_error(method_name) click to toggle source

nodoc

# File lib/omnicat/classifiers/strategy.rb, line 122
def not_implemented_error(method_name)
  raise NotImplementedError.new("#{self.class.name}##{method_name} method is not implemented!")
end