module HashingTrickMl::ActsAsVectorized
Public Instance Methods
build_boolean_vector(subset, full_set)
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 22 def build_boolean_vector(subset, full_set) subset.each_with_object([0] * full_set.size) do |value, result| index = full_set.index(value) result[index] = 1 if index.present? end end
build_exponential_vector(values, dimensions:)
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 33 def build_exponential_vector(values, dimensions:) values.reject(&:blank?).each_with_object([0] * dimensions) do |value, result| index = Math.log(value.to_f, 1.5) index = 0 if index.negative? index = dimensions - 1 if index > dimensions - 1 result[index.floor] += 1 end end
build_fuzzy_vector(statement)
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 29 def build_fuzzy_vector(statement) statement ? 1 : 0 end
build_maybe_nil_vector(value)
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 43 def build_maybe_nil_vector(value) [value.presence || 0, value.present? ? 1 : 0] end
build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ')
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 15 def build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ') words = normalize_words(data).split(separator) words.each_with_object([0] * dimensions) do |word, result| result[Digest::SHA2.digest(word.downcase).last(4).unpack('N1').first % dimensions] += 1 end end
Private Instance Methods
normalize_words(data)
click to toggle source
# File lib/hashing_trick_ml/acts_as_vectorized.rb, line 49 def normalize_words(data) strip_tags(data || '').gsub(/[^[:alpha:][:digit:]]+/, ' ').downcase end