class MiniHistogram

A class for building histogram info

Given an array, this class calculates the “edges” of a histogram these edges mark the boundries for “bins”

array = [1,1,1, 5, 5, 5, 5, 10, 10, 10]
histogram = MiniHistogram.new(array)
puts histogram.edges
# => [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0]

It also finds the weights (aka count of values) that would go in each bin:

puts histogram.weights
# => [3, 0, 4, 0, 0, 3]

This means that the `array` here had three items between 0.0 and 2.0.

Plots the histogram in unicode characters

Thanks to github.com/red-data-tools/unicode_plot.rb it could not be used because the dependency enumerable-statistics has a hard lock on a specific version of Ruby and this library needs to support older Rubies

Example:

require 'mini_histogram/plot'
array = 50.times.map { rand(11.2..11.6) }
histogram = MiniHistogram.new(array)
puts histogram.plot => Generates a plot

Constants

INT64_MAX
INT64_MIN
VERSION

Attributes

array[R]
left_p[R]
max[R]

Public Class Methods

dual_plot() { |a, b| ... } click to toggle source
# File lib/mini_histogram/plot.rb, line 66
def self.dual_plot
  a = PlotValue.new
  b = PlotValue.new

  yield a, b

  if b.options[:ylabel] == a.options[:ylabel]
    b.options[:ylabel] = nil
  end

  MiniHistogram.set_average_edges!(a.histogram, b.histogram)
  PlotValue.dual_plot(a.plot, b.plot)
end
new(array, left_p: true, edges: nil) click to toggle source
# File lib/mini_histogram.rb, line 24
def initialize(array, left_p: true, edges: nil)
  @array = array
  @left_p = left_p
  @edges = edges
  @weights = nil

  @min, @max = array.minmax
end
set_average_edges!(*array_of_histograms) click to toggle source

Given an array of Histograms this function calcualtes an average edge size along with the minimum and maximum edge values. It then updates the edge value on all inputs

The main pourpose of this method is to be able to chart multiple distributions against a similar axis

See for more context: github.com/schneems/derailed_benchmarks/pull/169

# File lib/mini_histogram.rb, line 213
def self.set_average_edges!(*array_of_histograms)
  array_of_histograms.each { |x| raise "Input expected to be a histogram but is #{x.inspect}" unless x.is_a?(MiniHistogram) }
  steps = array_of_histograms.map(&:bin_size)
  avg_step_size = steps.inject(&:+).to_f / steps.length

  max_value = array_of_histograms.map(&:max).max

  max_edge = array_of_histograms.map(&:edges_max).max
  min_edge = array_of_histograms.map(&:edges_min).min

  average_edges = [min_edge]
  while average_edges.last < max_edge
    average_edges << average_edges.last + avg_step_size
  end

  array_of_histograms.each {|h| h.update_values(edges: average_edges, max: max_value) }

  return array_of_histograms
end

Public Instance Methods

bin_size() click to toggle source
# File lib/mini_histogram.rb, line 57
def bin_size
  return 0 if edges.length <= 1

  edges[1] - edges[0]
end
closed() click to toggle source
# File lib/mini_histogram.rb, line 45
def closed
  @left_p ? :left : :right
end
edge()
Alias for: edges
edges() click to toggle source

Finds the “edges” of a given histogram that will mark the boundries for the histogram's “bins”

Example:

a = [1,1,1, 5, 5, 5, 5, 10, 10, 10]
MiniHistogram.new(a).edges
# => [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0]

There are multiple ways to find edges, this was taken from
https://github.com/mrkn/enumerable-statistics/issues/24

Another good set of implementations is in numpy
https://github.com/numpy/numpy/blob/d9b1e32cb8ef90d6b4a47853241db2a28146a57d/numpy/lib/histograms.py#L222
# File lib/mini_histogram.rb, line 122
def edges
  return @edges if @edges

  return @edges = [0.0] if array.empty?

  lo = @min
  hi = @max

  nbins = sturges.to_f

  if hi == lo
    start = lo
    step = 1.0
    divisor = 1.0
    len = 1
  else
    bw = (hi - lo) / nbins
    lbw = Math.log10(bw)
    if lbw >= 0
      step = 10 ** lbw.floor * 1.0
      r = bw/step

      if r <= 1.1
        # do nothing
      elsif r <= 2.2
        step *= 2.0
      elsif r <= 5.5
        step *= 5.0
      else
        step *= 10
      end
      divisor = 1.0
      start = step * (lo/step).floor
      len = ((hi - start)/step).ceil
    else
      divisor = 10 ** - lbw.floor
      r = bw * divisor
      if r <= 1.1
        # do nothing
      elsif r <= 2.2
        divisor /= 2.0
      elsif r <= 5.5
        divisor /= 5.0
      else
        divisor /= 10.0
      end
      step = 1.0
      start = (lo * divisor).floor
      len = (hi * divisor - start).ceil
    end
  end

  if left_p
    while (lo < start/divisor)
      start -= step
    end

    while (start + (len - 1)*step)/divisor <= hi
      len += 1
    end
  else
    while lo <= start/divisor
      start -= step
    end
    while (start + (len - 1)*step)/divisor < hi
      len += 1
    end
  end

  @edges = []
  len.times.each do
    @edges << start/divisor
    start += step
  end

  return @edges
end
Also aliased as: edge
edges_max() click to toggle source
# File lib/mini_histogram.rb, line 37
def edges_max
  edges.max
end
edges_min() click to toggle source
# File lib/mini_histogram.rb, line 33
def edges_min
  edges.min
end
histogram(*_) click to toggle source
# File lib/mini_histogram.rb, line 41
def histogram(*_)
  self
end
plot() click to toggle source
# File lib/mini_histogram.rb, line 201
def plot
  raise "You must `require 'mini_histogram/plot'` to get this feature"
end
sturges() click to toggle source

Weird name, right? There are multiple ways to calculate the number of “bins” a histogram should have, one of the most common is the “sturges” method

Here are some alternatives from numpy: github.com/numpy/numpy/blob/d9b1e32cb8ef90d6b4a47853241db2a28146a57d/numpy/lib/histograms.py#L489-L521

# File lib/mini_histogram.rb, line 69
def sturges
  len = array.length
  return 1.0 if len == 0

  # return (long)(ceil(Math.log2(n)) + 1);
  return Math.log2(len).ceil + 1
end
update_values(edges:, max: ) click to toggle source

Sets the edge value to something new, also clears any previously calculated values

# File lib/mini_histogram.rb, line 51
def update_values(edges:, max: )
  @edges = edges
  @max = max
  @weights = nil # clear memoized value
end
weights() click to toggle source

Given an array of edges and an array we want to generate a histogram from return the counts for each “bin”

Example:

a = [1,1,1, 5, 5, 5, 5, 10, 10, 10]
edges = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0]

MiniHistogram.new(a).weights
# => [3, 0, 4, 0, 0, 3]

This means that the `a` array has 3 values between 0.0 and 2.0
4 values between 4.0 and 6.0 and three values between 10.0 and 12.0
# File lib/mini_histogram.rb, line 90
def weights
  return @weights if @weights
  return @weights = [] if array.empty?

  lo = edges.first
  step = edges[1] - edges[0]

  max_index = ((@max  - lo) / step).floor
  @weights = Array.new(max_index + 1, 0)

  array.each do |x|
    index = ((x - lo) / step).floor
    @weights[index] += 1
  end

  return @weights
end

Private Instance Methods

barplot( *args, width: 40, color: :green, symbol: "■", border: :barplot, xscale: nil, xlabel: nil, data: nil, **kw) click to toggle source

Begin copy/pasta from unicode_plot.rb with some slight modifications

# File lib/mini_histogram/plot.rb, line 125
        def barplot(
  *args,
  width: 40,
  color: :green,
  symbol: "■",
  border: :barplot,
  xscale: nil,
  xlabel: nil,
  data: nil,
  **kw)
  case args.length
  when 0
    data = Hash(data)
    keys = data.keys.map(&:to_s)
    heights = data.values
  when 2
    keys = Array(args[0])
    heights = Array(args[1])
  else
    raise ArgumentError, "invalid arguments"
  end

  unless keys.length == heights.length
    raise ArgumentError, "The given vectors must be of the same length"
  end
  unless heights.min >= 0
    raise ArgumentError, "All values have to be positive. Negative bars are not supported."
  end

  xlabel ||= ValueTransformer.transform_name(xscale)
  plot = MiniUnicodePlot::Barplot.new(heights, width, color, symbol, xscale,
                     border: border, xlabel: xlabel,
                     **kw)
  keys.each_with_index do |key, i|
    plot.annotate_row!(:l, i, key)
  end

  plot
end
ceil_neg_log10(x) click to toggle source
# File lib/mini_histogram/plot.rb, line 175
        def ceil_neg_log10(x)
  if roundable?(-Math.log10(x))
    (-Math.log10(x)).ceil
  else
    (-Math.log10(x)).floor
  end
end
float_round_log10(x, m) click to toggle source
# File lib/mini_histogram/plot.rb, line 165
        def float_round_log10(x, m)
  if x == 0
    0.0
  elsif x > 0
    x.round(ceil_neg_log10(m) + 1).to_f
  else
    -(-x).round(ceil_neg_log10(m) + 1).to_f
  end
end
roundable?(x) click to toggle source
# File lib/mini_histogram/plot.rb, line 185
        def roundable?(x)
  x.to_i == x && INT64_MIN <= x && x < INT64_MAX
end