class SamplingHash::Hash
Public Class Methods
new(size, seed = size, sampler = nil, xxhash = XXhash::XXhashInternal::StreamingHash64.new(seed))
click to toggle source
# File lib/sampling-hash/hash.rb, line 3 def initialize(size, seed = size, sampler = nil, xxhash = XXhash::XXhashInternal::StreamingHash64.new(seed)) @sampler = sampler || Sampler.new(size) @xxhash = xxhash # Position in data stream. @position = 0 # Current sample. @current_sample = nil # The data. @current_sample_offset = 0 # The offset (within the stream). @current_sample_size = 0 # The sample size. @next = 0 # The next sample index. # Start. next_sample end
Public Instance Methods
digest()
click to toggle source
# File lib/sampling-hash/hash.rb, line 30 def digest @xxhash.digest end
update(chunk)
click to toggle source
# File lib/sampling-hash/hash.rb, line 20 def update(chunk) pos = 0 while pos < chunk.size len = chunk.size - pos used = advance(chunk, pos, len) @position += used pos += used end end
Private Instance Methods
advance(chunk, pos, len)
click to toggle source
# File lib/sampling-hash/hash.rb, line 36 def advance(chunk, pos, len) if in_sample? # Use some bytes. msb = missing_sample_bytes if msb > len update_sample chunk[pos..(pos + len - 1)] len else finish_sample chunk[pos..(pos + msb - 1)] msb end elsif samples_left? # Discard some bytes until the next sample starts. mgb = missing_gap_bytes if mgb > len len else mgb end else # Discard the rest. len end end
finish_sample(data)
click to toggle source
# File lib/sampling-hash/hash.rb, line 81 def finish_sample(data) @current_sample += data @xxhash.update(@current_sample) next_sample end
in_sample?()
click to toggle source
# File lib/sampling-hash/hash.rb, line 61 def in_sample? samples_left? && @position >= @current_sample_offset && @position < @current_sample_offset + @current_sample_size end
missing_gap_bytes()
click to toggle source
# File lib/sampling-hash/hash.rb, line 73 def missing_gap_bytes @current_sample_offset - @position end
missing_sample_bytes()
click to toggle source
# File lib/sampling-hash/hash.rb, line 69 def missing_sample_bytes @current_sample_size - @current_sample.size end
next_sample()
click to toggle source
# File lib/sampling-hash/hash.rb, line 87 def next_sample if @next < @sampler.samples.size @current_sample = String.new @current_sample_offset, @current_sample_size = @sampler.samples[@next] @next += 1 else @current_sample = nil end end
samples_left?()
click to toggle source
# File lib/sampling-hash/hash.rb, line 65 def samples_left? !!@current_sample end
update_sample(data)
click to toggle source
# File lib/sampling-hash/hash.rb, line 77 def update_sample(data) @current_sample += data end