class Elastic::Stats::NaiveBayes::Set

A set of documents against which statistics will be calculated

Attributes

category_field[R]
index[R]
subject_field[R]
type[R]

Public Class Methods

new(index, type, category_field, subject_field) click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 13
def initialize(index, type, category_field, subject_field)
  @index = index
  @type = type
  @category_field = category_field
  @subject_field = subject_field
end

Public Instance Methods

analyze(options = {}) click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 53
def analyze(options = {})
  client.indices.analyze({ index: index }.merge(options))
end
categories() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 25
def categories
  init_stats if @categories.nil?
  @categories
end
count() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 20
def count
  init_stats if @count.nil?
  @count
end
token_categories() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 34
def token_categories
  @token_categories ||= Hash.new do |h, k|
    result = count_search[k]['aggregations']['counts']['buckets'].map do |bucket|
      { bucket['key'] => bucket['doc_count'] }
    end
    h[k] = Hash.new(0).merge(result.reduce(:merge))
  end
end
tokenize(subject) click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 43
def tokenize(subject)
  results = analyze field: subject_field, text: subject
  results['tokens'].collect { |x| x['token'] }
end
tokens() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 30
def tokens
  @tokens ||= Hash.new { |h, k| h[k] = count_search[k]['hits']['total'] }
end

Private Instance Methods

aggregation() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 79
def aggregation
  {
    aggs: {
      counts: {
        terms: {
          field: category_field,
          size: 200 # We're assuming there's less than 200 categories
        }
      }
    }
  }
end
init_stats() click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 59
def init_stats
  results = Hashie::Mash.new(
    search(search_type: 'count', body: aggregation)
  )

  @count = results.hits.total
  @categories = results.aggregations.counts.buckets.map do |bucket|
    { bucket['key'] => bucket['doc_count'] }
  end
  @categories = @categories.reduce(:merge)
end
token_query(token) click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 94
def token_query(token)
  body = Hashie::Mash.new
  body.query!.filtered!.filter!.term!
  body.query.filtered.filter.term[subject_field] = token
  body.merge aggregation
end