class ObjectHash

A content hash algorithm which works across multiple encodings (JSON, Protobufs, etc)

Constants

DEFAULT_HASH_ALGORITHM

Default algorithm to use for computing object hashes

VERSION

Public Class Methods

digest(object, normalize: true) click to toggle source

Compute a raw ObjectHash digest using the default algorithm

# File lib/objecthash.rb, line 15
def self.digest(object, normalize: true)
  new.digest(object, normalize: normalize)
end
hexdigest(object, normalize: true) click to toggle source

Compute an hex ObjectHash digest using the default algorithm

# File lib/objecthash.rb, line 20
def self.hexdigest(object, normalize: true)
  new.hexdigest(object, normalize: normalize)
end
new(hash_algorithm = DEFAULT_HASH_ALGORITHM) click to toggle source
# File lib/objecthash.rb, line 24
def initialize(hash_algorithm = DEFAULT_HASH_ALGORITHM)
  @hash_algorithm = hash_algorithm
end

Public Instance Methods

digest(object, normalize: true) click to toggle source

Compute the ObjectHash of the given object

# File lib/objecthash.rb, line 29
def digest(object, normalize: true)
  case object
  when Array       then obj_hash_list(object)
  when Hash        then obj_hash_dict(object)
  when String      then obj_hash_unicode(object, normalize)
  when Symbol      then obj_hash_unicode(object.to_s, normalize)
  when Float       then obj_hash_float(object)
  when Fixnum      then obj_hash_int(object)
  when Set         then obj_hash_set(object)
  when true, false then obj_hash_bool(object)
  when nil         then hash_primitive("n", "")
  else             raise TypeError, "unsupported: #{object.class}"
  end
end
hexdigest(object, normalize: true) click to toggle source

Compute the ObjectHash of the given object as hexadecimal

# File lib/objecthash.rb, line 45
def hexdigest(object, normalize: true)
  digest(object, normalize: normalize).unpack("H*").first
end

Private Instance Methods

float_normalize(f) click to toggle source

rubocop:disable Metrics/AbcSize rubocop:disable Metrics/CyclomaticComplexity

# File lib/objecthash.rb, line 82
def float_normalize(f)
  # special case 0
  # Note that if we allowed f to end up > .5 or == 0, we'd get the same thing
  return "+0:" if f == 0.0

  # sign
  if f >= 0
    s = "+"
  else
    s = "-"
    f = -f
  end

  # exponent
  e = 0
  while f > 1
    f /= 2
    e += 1
  end

  while f <= 0.5
    f *= 2
    e -= 1
  end

  s += "#{e}:"

  # mantissa
  raise unless f <= 1
  raise unless f > 0.5

  while f != 0.0
    if f >= 1
      s += "1"
      f -= 1
    else
      s += "0"
    end

    raise unless f < 1
    raise unless s.length < 1000
    f *= 2
  end

  s
end
hash_primitive(t, b) click to toggle source
# File lib/objecthash.rb, line 51
def hash_primitive(t, b)
  m = @hash_algorithm.is_a?(Proc) ? @hash_algorithm.call : @hash_algorithm.new
  m.update(t)
  m.update(b)
  m.digest
end
obj_hash_bool(b) click to toggle source
# File lib/objecthash.rb, line 58
def obj_hash_bool(b)
  hash_primitive("b", b ? "1" : "0")
end
obj_hash_dict(d) click to toggle source
# File lib/objecthash.rb, line 67
def obj_hash_dict(d)
  h = d.map { |k, v| digest(k) + digest(v) }.sort.join
  hash_primitive("d", h)
end
obj_hash_float(f) click to toggle source

rubocop:enable Metrics/AbcSize rubocop:enable Metrics/CyclomaticComplexity

# File lib/objecthash.rb, line 131
def obj_hash_float(f)
  hash_primitive("f", float_normalize(f))
end
obj_hash_int(i) click to toggle source
# File lib/objecthash.rb, line 135
def obj_hash_int(i)
  hash_primitive("i", i.to_s)
end
obj_hash_list(l) click to toggle source
# File lib/objecthash.rb, line 62
def obj_hash_list(l)
  h = l.map { |o| digest(o) }.join
  hash_primitive("l", h)
end
obj_hash_set(s) click to toggle source
# File lib/objecthash.rb, line 139
def obj_hash_set(s)
  h = s.map { |e| e << digest(e) }.sort.join
  hash_primitive("s", h)
end
obj_hash_unicode(u, n) click to toggle source

Takes a unicode string and a boolean to indicate whether to normalize unicode or not.

# File lib/objecthash.rb, line 74
def obj_hash_unicode(u, n)
  u_enc = u.encode("utf-8")
  u_norm = n ? u_enc.unicode_normalize(:nfc) : u_enc
  hash_primitive("u", u_norm)
end