module HashingTrickMl::ActsAsVectorized

Public Instance Methods

build_boolean_vector(subset, full_set) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 22
def build_boolean_vector(subset, full_set)
  subset.each_with_object([0] * full_set.size) do |value, result|
    index = full_set.index(value)
    result[index] = 1 if index.present?
  end
end
build_exponential_vector(values, dimensions:) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 33
def build_exponential_vector(values, dimensions:)
  values.reject(&:blank?).each_with_object([0] * dimensions) do |value, result|
    index = Math.log(value.to_f, 1.5)
    index = 0 if index.negative?
    index = dimensions - 1 if index > dimensions - 1

    result[index.floor] += 1
  end
end
build_fuzzy_vector(statement) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 29
def build_fuzzy_vector(statement)
  statement ? 1 : 0
end
build_maybe_nil_vector(value) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 43
def build_maybe_nil_vector(value)
  [value.presence || 0, value.present? ? 1 : 0]
end
build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ') click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 15
def build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ')
  words = normalize_words(data).split(separator)
  words.each_with_object([0] * dimensions) do |word, result|
    result[Digest::SHA2.digest(word.downcase).last(4).unpack('N1').first % dimensions] += 1
  end
end

Private Instance Methods

normalize_words(data) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 49
def normalize_words(data)
  strip_tags(data || '').gsub(/[^[:alpha:][:digit:]]+/, ' ').downcase
end
strip_tags(html) click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 53
def strip_tags(html)
  ActionController::Base.helpers.strip_tags(html)
end