module NgramsParser

Parse word to ngrams

Parse given text into ngrams

Gem version

Constants

VERSION

Public Class Methods

ngram(word, size) click to toggle source

Split word into ngrams

self.ngram("lorem", 2) #=> ["lo", "or", "re", "em", "m "]
self.ngram("ipsum", 3) #=> ["ips", "psu", "sum", "um ", "m  "]
# File lib/ngrams_parser/ngram.rb, line 9
def self.ngram(word, size)
  array = []
  word.split('').each_index do |index|
    text = word[index..index + size - 1]
    array << text.ljust(size, ' ')
  end
  array
end
ngrams(text, size) click to toggle source
# File lib/ngrams_parser/ngrams.rb, line 6
def self.ngrams(text, size)
  array = []
  LexicalUnits.words_without_digits(text).each do |word|
    array << ngram(word, size)
  end
  array.flatten
end