module Rust::Descriptive

Public Class Methods

mean(data) click to toggle source
# File lib/rust-descriptive.rb, line 7
def mean(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return data.sum.to_f / data.size
end
median(data) click to toggle source
# File lib/rust-descriptive.rb, line 30
def median(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    sorted = data.sort
    if data.size == 0
        return Float::NAN
    elsif data.size.odd?
        return sorted[data.size / 2]
    else
        i = (data.size / 2)
        return (sorted[i - 1] + sorted[i]) / 2.0
    end
end
outliers(data, k=1.5, **opts) click to toggle source
# File lib/rust-descriptive.rb, line 82
def outliers(data, k=1.5, **opts)
    outliers_according_to(data, data, k, **opts)
end
outliers_according_to(data, data_distribution, k=1.5, **opts) click to toggle source
# File lib/rust-descriptive.rb, line 86
def outliers_according_to(data, data_distribution, k=1.5, **opts)
    quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
    q1 = quantiles[0.25]
    q3 = quantiles[0.75]
    iqr = q3 - q1
    
    positive_outliers = data.select { |d| d > q3 + iqr * k }
    negative_outliers = data.select { |d| d < q1 - iqr * k }
    
    outliers = negative_outliers + positive_outliers
    if opts[:side]
        case opts[:side].to_sym
        when :positive, :neg, :n, :+
            outliers = positive_outliers
        when :negative, :pos, :p, :-
            outliers = negative_outliers
        end
    end
    
    return outliers
end
quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0]) click to toggle source
# File lib/rust-descriptive.rb, line 50
def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0])
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
    raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
    
    n = data.size
    quantiles = percentiles.size
    percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
    
    rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
    floor_indices = rough_indices.map { |i| i.floor }
    ceil_indices = rough_indices.map { |i| i.ceil }
    
    data = data.sort
    result = floor_indices.map { |i| data[i] }
    result_ceil = ceil_indices.map { |i| data[i] }
    
    indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
    index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
    reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
    hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
    data_hi_indices = hi_indices.map { |i| data[i] }
    
    j = 0
    indices_to_fix.each do |i|
        result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
        j += 1
    end
    
    return percentiles.zip(result).to_h
end
sd(data)
Alias for: standard_deviation
standard_deviation(data) click to toggle source
# File lib/rust-descriptive.rb, line 13
def standard_deviation(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return Math.sqrt(variance(data))
end
Also aliased as: sd, stddev
stddev(data)
Alias for: standard_deviation
sum(data) click to toggle source
# File lib/rust-descriptive.rb, line 44
def sum(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return data.sum
end
var(data)
Alias for: variance
variance(data) click to toggle source
# File lib/rust-descriptive.rb, line 21
def variance(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    return Float::NAN if data.size < 2
    
    mean = mean(data)
    return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
end
Also aliased as: var