module Lttb
Constants
- VERSION
Public Class Methods
largest_triangle_three_buckets(data, threshold, options = {})
click to toggle source
Return a downsampled version of data. Parameters
data: list of lists/tuples
data must be formated this way: [[x,y], [x,y], [x,y], ...] or: [(x,y), (x,y), (x,y), ...]
threshold: int
threshold must be >= 2 and <= to the len of data
Returns
data, but downsampled using threshold
# File lib/lttb/process.rb, line 14 def self.largest_triangle_three_buckets(data, threshold, options = {}) # Check if data and threshold are valid check_data(data) check_threshold(threshold) check_tuples(data) if options[:check_tuples] data = handle_dates(data) if options[:dates] # cache data size data_length = data.size # Nothing to do? return data if threshold >= data_length || threshold == 0 # Bucket size. Leave room for start and end data points every = (data_length - 2).fdiv(threshold - 2) a = 0 # Initially a is the first point in the triangle next_a = 0 max_area_point = nil max_area = 0 area = 0 sampled = [] sampled[0] = data[a] # Always add the first point sampled_index = 1 # Now we start at index 1 i = 0 # initializing first buckets bucket_start = (((i + 0) * every).floor + 1).to_i bucket_mid = (((i + 1) * every).floor + 1).to_i while i < threshold - 2 # Calculate new bucket end bucket_end = (((i + 2) * every).floor + 1).to_i # Calculate point average for next bucket (containing c) avg_x = 0 avg_y = 0 avg_range_start = bucket_mid avg_range_end = bucket_end < data_length ? bucket_end : data_length avg_range_length = avg_range_end - avg_range_start while avg_range_start < avg_range_end avg_x += data[avg_range_start][0] avg_y += data[avg_range_start][1] avg_range_start += 1 # increment end avg_x = avg_x.fdiv avg_range_length avg_y = avg_y.fdiv avg_range_length # Get the range for this bucket range_offs = bucket_start range_to = bucket_mid # Point a point_ax = data[a][0] point_ay = data[a][1] max_area = area = -1 while range_offs < range_to # Calculate triangle area over three buckets area = ( (point_ax - avg_x) * (data[range_offs][1] - point_ay) - (point_ax - data[range_offs][0]) * (avg_y - point_ay) ).abs * 0.5 if area > max_area max_area = area max_area_point = data[range_offs] next_a = range_offs # Next a is this b end range_offs += 1 # increment end # move the bucket by one step (every) bucket_start = bucket_mid bucket_mid = bucket_end sampled[sampled_index] = max_area_point # Pick this point from the bucket sampled_index += 1 a = next_a # This a is the next a (chosen b) i += 1 # increase count end sampled[sampled_index] = data[data.size - 1] # Always add last sampled = as_dates(sampled) if options[:dates] sampled end
Also aliased as: process
Private Class Methods
as_dates(data)
click to toggle source
# File lib/lttb/process.rb, line 139 def as_dates(data) data.map do |d| d[0] = DateTime.strptime(d[0].to_s, '%Q') d end end
check_data(data)
click to toggle source
# File lib/lttb/process.rb, line 116 def check_data(data) raise LttbException, 'data is not an array' unless data.is_a? Array end
check_threshold(threshold)
click to toggle source
# File lib/lttb/process.rb, line 120 def check_threshold(threshold) return if threshold.is_a?(Integer) && threshold > 2 raise LttbException, "threshold not well defined: #{threshold}" end
check_tuples(data)
click to toggle source
# File lib/lttb/process.rb, line 125 def check_tuples(data) data.each do |i| next if i.is_a?(Array) && i.size == 2 raise LttbException, 'datapoints are not lists or tuples' end end
handle_dates(data)
click to toggle source
# File lib/lttb/process.rb, line 132 def handle_dates(data) data.map do |d| d[0] = d[0].strftime('%Q').to_i d end end