class Elastic::Stats::NaiveBayes::Set
A set of documents against which statistics will be calculated
Attributes
category_field[R]
index[R]
subject_field[R]
type[R]
Public Class Methods
new(index, type, category_field, subject_field)
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 13 def initialize(index, type, category_field, subject_field) @index = index @type = type @category_field = category_field @subject_field = subject_field end
Public Instance Methods
analyze(options = {})
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 53 def analyze(options = {}) client.indices.analyze({ index: index }.merge(options)) end
categories()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 25 def categories init_stats if @categories.nil? @categories end
count()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 20 def count init_stats if @count.nil? @count end
search(options = {})
click to toggle source
Elasticsearch client helper methods
# File lib/elastic/stats/naive-bayes/set.rb, line 49 def search(options = {}) client.search({ index: index, type: type }.merge(options)) end
token_categories()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 34 def token_categories @token_categories ||= Hash.new do |h, k| result = count_search[k]['aggregations']['counts']['buckets'].map do |bucket| { bucket['key'] => bucket['doc_count'] } end h[k] = Hash.new(0).merge(result.reduce(:merge)) end end
tokenize(subject)
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 43 def tokenize(subject) results = analyze field: subject_field, text: subject results['tokens'].collect { |x| x['token'] } end
tokens()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 30 def tokens @tokens ||= Hash.new { |h, k| h[k] = count_search[k]['hits']['total'] } end
Private Instance Methods
aggregation()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 79 def aggregation { aggs: { counts: { terms: { field: category_field, size: 200 # We're assuming there's less than 200 categories } } } } end
count_search()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 73 def count_search @count_search ||= Hash.new{ |h, k| h[k] = search search_type: 'count', body: token_query(k) } end
init_stats()
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 59 def init_stats results = Hashie::Mash.new( search(search_type: 'count', body: aggregation) ) @count = results.hits.total @categories = results.aggregations.counts.buckets.map do |bucket| { bucket['key'] => bucket['doc_count'] } end @categories = @categories.reduce(:merge) end
token_query(token)
click to toggle source
# File lib/elastic/stats/naive-bayes/set.rb, line 94 def token_query(token) body = Hashie::Mash.new body.query!.filtered!.filter!.term! body.query.filtered.filter.term[subject_field] = token body.merge aggregation end