class SimString::NGramBuilder
Constants
- SENTINAL_CHAR
Attributes
n[RW]
Public Class Methods
new(n)
click to toggle source
# File lib/simstring_pure.rb, line 19 def initialize(n) self.n = n end
Public Instance Methods
features(string)
click to toggle source
# File lib/simstring_pure.rb, line 23 def features(string) prefix_and_suffix_string = SENTINAL_CHAR * (n - 1) string = prefix_and_suffix_string + string + prefix_and_suffix_string ngram_strings = string.each_char.each_cons(n).map(&:join) ngram_strings_to_count_map = ngram_strings.reduce({}) {|memo, ngram_string| memo[ngram_string] = (memo[ngram_string] || 0) + 1; memo } numbered_ngrams = ngram_strings_to_count_map.flat_map {|ngram_string, count| (1..count).map {|i| NGram.new(ngram_string, i) } } numbered_ngrams.to_set end