class Elastic::Stats::KS
Utility to determine the KolmogorovSmirnov difference between to sets of data fetched from Elasticsearch
Constants
- MULTIPLIERS
Attributes
field[R]
from[R]
indices[R]
interval[R]
logger[RW]
query[RW]
span[R]
to[R]
Public Class Methods
new(indices, options = {})
click to toggle source
indices should include all possible indices.
# File lib/elastic/stats/ks.rb, line 25 def initialize(indices, options = {}) @indices = indices options = default_options.update(options) @to = options.delete(:to) @span = options.delete(:span) @interval = options.delete(:interval) @field = options.delete(:field) @offset = options.delete(:offset) @indices = [indices] unless @indices.is_a? Array @from = @to - @span end
Public Instance Methods
calculate(current, previous, confidence)
click to toggle source
# File lib/elastic/stats/ks.rb, line 55 def calculate(current, previous, confidence) MULTIPLIERS[confidence] * Math.sqrt( ( (current.count + previous.count).to_f / (current.count * previous.count) ) ) end
fetch(confidence = 0.05)
click to toggle source
# File lib/elastic/stats/ks.rb, line 40 def fetch(confidence = 0.05) current = range(@from, @to) previous = range(@from - @offset, @to - @offset) difference = Statsample::Test::KolmogorovSmirnov.new( current, previous ).d comparison = calculate(current, previous, confidence) { confidence: confidence, comparison: comparison, difference: difference, different?: (difference > comparison) } end
Private Instance Methods
aggregate(from, to)
click to toggle source
# File lib/elastic/stats/ks.rb, line 81 def aggregate(from, to) { field: field, interval: interval, min_doc_count: 0, extended_bounds: { min: (from * 1000), max: (to * 1000) } } end
body(from, to)
click to toggle source
# File lib/elastic/stats/ks.rb, line 72 def body(from, to) body = Hashie::Mash.new body.query = query if query body.aggregations!.hits_per_minute!.date_histogram = aggregate(from, to) body end
default_options()
click to toggle source
# File lib/elastic/stats/ks.rb, line 93 def default_options { to: Time.new.to_i, span: (60 * 60 * 12), interval: '1h', field: '@timestamp', offset: (60 * 60 * 24 * 7) } end
range(from, to)
click to toggle source
# File lib/elastic/stats/ks.rb, line 66 def range(from, to) Hashie::Mash.new( client.search index: indices.join(','), body: body(from, to) ).aggregations.hits_per_minute.buckets.collect(&:doc_count) end